In [None]:
import requests
from pyspark.sql.functions import col
import time
import uuid
from datetime import datetime
from pyspark.sql.types import DateType
from pyspark.sql.types import TimestampType

# Utility Functions

In [None]:
class Utility:
    def convert_int_to_float(data):
        if isinstance(data, int):
            return float(data)
        elif isinstance(data, list):
            return [Utility.convert_int_to_float(item) for item in data]
        elif isinstance(data, dict):
            return {k: Utility.convert_int_to_float(v) for k, v in data.items()}
        else:
            return data


# Get API

In [None]:
class Weather:
    
    raw_schema = "RAW"
    raw_table = "raw_weather"
    
    def get_and_load_raw(createdBy : str = 'Gokarna Adhiarki', selectedCities : list[str]= ['Kathmandu', 'Pokhara', 'Gorkha', 'Biratnagar', 'Kirtipur'] ):
        cities = spark.read.load("dbfs:/FileStore/tables/Cities")
        cities = cities.filter(col('name').isin(selectedCities)).select('name', 'lat', 'lon')
        
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {Weather.raw_schema};")
        
        
        res = []
        id = LogTable.load('RAW', 'STARTED', 'raw.raw_weather') 
        for i, r in cities.toPandas().iterrows():
            data = Weather.get_response(*r)
            toLoad = {
                'created_on' : str(datetime.now()),
                'created_by' : createdBy,
                'data' : data,
                'id': id
            }  
            
            json_rdd = spark.sparkContext.parallelize([toLoad])
            df = spark.read.json(json_rdd)
            
            LogTable.load('RAW', 'EXTRACTING', 'raw.raw_weather')
            df.write.format('delta').mode('append').option("mergeSchema", "true").saveAsTable(f"{Weather.raw_schema}.{Weather.raw_table}")

        LogTable.load('RAW', 'COMPLETED', 'raw.raw_weather')
        
        return "Successfully Loaded !"
        
    def get_response(cityName : str, lat : float, lon : float) -> dict:
        api_key = '*********'
        url = f"https://api.openweathermap.org/data/2.5/weather?lat={lat}&lon={lon}&appid={api_key}&units=metric"
        response = requests.get(url)
        data = response.json()
        data['city'] = cityName

        return Utility.convert_int_to_float(data)

# Log Maintain

In [None]:
class LogTable:
    schemaName = 'FACT'
    tableName = 'log_table'     
    def load(load_type, status, load_to_table_name : str, comments='', created_by='Gokarna Adhikari'):
        id = str(uuid.uuid4())
        LogTable.load_table(     
            LogTable.schemaName,
            LogTable.schemaName, 
            id = id,
            load_type = load_type,
            table_name = load_to_table_name,
            process_start_time = str(datetime.now()),
            process_end_time = str(datetime.now()),
            status = status,
            comments = comments,
            start_date_time = str(datetime.now()),
            end_date_time = str(datetime.now()),
            created_on = str(datetime.now()),
            created_by = created_by
                      )
        return id
        
        
    def load_table(schema : str, table : str, **kwargs):
        spark.sql(f"CREATE SCHEMA IF NOT EXISTS {schema};")
        json_rdd = spark.sparkContext.parallelize([kwargs])
        df = spark.read.json(json_rdd)

        # converting to compatible type
        timestamp_cols = ["process_start_time", "process_end_time", "start_date_time", "end_date_time", "created_on"]
        for col_name in timestamp_cols:
            df = df.withColumn(col_name, col(col_name).cast(TimestampType()))

        df.write.format('delta').mode('append').option("mergeSchema", "true").saveAsTable(f"{schema}.{LogTable.tableName}")
        
        return kwargs['id']
