In [0]:
from pyspark.sql.functions import lit, col

#### Loading data from a csv file

In [0]:
df = spark.read.format("csv").option("header", "true").load("dbfs:/FileStore/shared_uploads/thakuranand117@gmail.com/Weather_data.csv")

In [0]:
# Adding a new column to check for forecasted data and adding false to actual data
df = df.withColumn('Forecasted', lit('False'))
display(df)

id,city_name,lon,lat,date,time,temp,temp_min,temp_max,pressure,humidity,dt,timezone,visibility,wind_deg,wind_speed,wind_gust,clouds_all,Forecasted
1282616,Wali?,83.76667,27.98333,2023-06-08 12:17:32,12:17:32,312.06,312.06,312.06,1001,14,1686205953,20700,10000,146.0,2.65,3.37,8.0,False
1282621,Upardang Gadhi,84.566666,27.766666,2023-06-08 12:17:32,12:17:32,310.8,310.8,310.8,1000,14,1686205955,20700,10000,210.0,4.1,4.28,9.0,False
1282635,Tulsipur,82.297256,28.130989,2023-06-08 12:17:32,12:17:32,310.78,310.78,310.78,1003,17,1686205411,20700,10000,227.0,6.24,5.87,5.0,False
1282665,Tikoli,84.5,27.633333,2023-06-08 12:17:32,12:17:32,315.97,315.97,315.97,1001,13,1686205413,20700,10000,206.0,3.95,3.55,2.0,False
1282666,?ikapur,81.133331,28.5,2023-06-08 12:17:32,12:17:32,314.91,314.91,314.91,1001,12,1686205415,20700,10000,252.0,3.95,2.73,0.0,False
1282616,Wali?,83.76667,27.98333,2023-06-08 13:18:39,13:18:39,312.44,312.44,312.44,1000,14,1686209620,20700,10000,155.0,2.75,3.24,12.0,False
1282621,Upardang Gadhi,84.566666,27.766666,2023-06-08 13:18:39,13:18:39,311.1,311.1,311.1,999,13,1686209622,20700,10000,208.0,3.79,4.64,12.0,False
1282635,Tulsipur,82.297256,28.130989,2023-06-08 13:18:39,13:18:39,311.66,311.66,311.66,1001,15,1686209624,20700,10000,231.0,5.56,6.25,35.0,False
1282665,Tikoli,84.5,27.633333,2023-06-08 13:18:39,13:18:39,316.94,316.94,316.94,998,12,1686209625,20700,10000,202.0,3.89,4.71,2.0,False
1282666,?ikapur,81.133331,28.5,2023-06-08 13:18:39,13:18:39,316.76,316.76,316.76,999,10,1686209627,20700,10000,219.0,3.05,2.59,0.0,False


In [0]:
def forecast_one_hour(df):
    df.createOrReplaceTempView('weather_data')
    df_forecasted = spark.sql("""
        with cte as (
            select 
              *,
              ROW_NUMBER() OVER (PARTITION BY city_name ORDER BY time desc) as time_order
            from 
              weather_data
            ), cte2 as (
            select distinct
              id,
              city_name,
              lon,
              lat,
              (max(date) over (partition by id)) + interval '1 hour' as dates,
              DATE_FORMAT(TO_TIMESTAMP(dates), 'HH:mm:ss') as times,
              avg(temp) OVER (PARTITION BY id) as temp,
              avg(temp_min) OVER (PARTITION BY id) as temp_min,
              avg(temp_max) OVER (PARTITION BY id) as temp_max,
              avg(pressure) OVER (PARTITION BY id) as pressure,
              avg(humidity) OVER (PARTITION BY id) as humidity,
              dt,
              timezone,
              avg(visibility) OVER (PARTITION BY id) as visibility,
              avg(wind_deg) OVER (PARTITION BY id) as wind_deg,
              avg(wind_speed) OVER (PARTITION BY id) as wind_speed,
              avg(wind_gust) OVER (PARTITION BY id) as wind_gust,
              avg(clouds_all) OVER (PARTITION BY id) as cloud_all,
              'True' as Forecasted,
              time_order
            from cte 
            where time_order < 5 
            )
            select * from cte2 where time_order=1;
    """)
    return df_forecasted.drop('time_order')

In [0]:
display(forecast_one_hour())

id,city_name,lon,lat,dates,"date_format(to_timestamp(lateralAliasReference(dates)), HH:mm:ss)",temp,temp_min,temp_max,pressure,humidity,dt,timezone,visibility,visibility.1,wind_deg,wind_speed,wind_gust,cloud_all,Forecasted
1282616,Wali?,83.76667,27.98333,2023-06-08 18:22:03,18:22:03,311.02500000000003,311.02500000000003,311.02500000000003,999.0,15.25,1686224225,20700,10000,10000.0,200.5,3.905,4.1075,20.0,True
1282621,Upardang Gadhi,84.566666,27.766666,2023-06-08 18:22:03,18:22:03,310.0925,310.0925,310.0925,997.25,13.0,1686224227,20700,10000,10000.0,203.25,3.39,4.48,11.0,True
1282635,Tulsipur,82.297256,28.130989,2023-06-08 18:22:03,18:22:03,310.605,310.605,310.605,1000.25,15.25,1686224228,20700,10000,10000.0,233.75,3.1175,4.02,36.5,True
1282665,Tikoli,84.5,27.633333,2023-06-08 18:22:03,18:22:03,316.0225,316.0225,316.0225,996.5,11.25,1686224230,20700,10000,10000.0,205.75,3.055,3.93,1.0,True
1282666,?ikapur,81.133331,28.5,2023-06-08 18:22:03,18:22:03,316.40250000000003,316.40250000000003,316.40250000000003,996.75,9.0,1686224232,20700,10000,10000.0,241.75,2.4225,2.6125,0.0,True


In [0]:
# Forecasting for coming 4 hours
def forecast_n_hour(df, n):
    for i in range(0,n):
        df_forecasted = forecast_one_hour(df)
        df = df.union(df_forecasted)
    return df
df_updated = forecast_n_hour(df,4)
display(df_updated)

id,city_name,lon,lat,date,time,temp,temp_min,temp_max,pressure,humidity,dt,timezone,visibility,wind_deg,wind_speed,wind_gust,clouds_all,Forecasted
1282616,Wali?,83.76667,27.98333,2023-06-08 12:17:32,12:17:32,312.06,312.06,312.06,1001.0,14.0,1686205953,20700,10000.0,146.0,2.65,3.37,8.0,False
1282621,Upardang Gadhi,84.566666,27.766666,2023-06-08 12:17:32,12:17:32,310.8,310.8,310.8,1000.0,14.0,1686205955,20700,10000.0,210.0,4.1,4.28,9.0,False
1282635,Tulsipur,82.297256,28.130989,2023-06-08 12:17:32,12:17:32,310.78,310.78,310.78,1003.0,17.0,1686205411,20700,10000.0,227.0,6.24,5.87,5.0,False
1282665,Tikoli,84.5,27.633333,2023-06-08 12:17:32,12:17:32,315.97,315.97,315.97,1001.0,13.0,1686205413,20700,10000.0,206.0,3.95,3.55,2.0,False
1282666,?ikapur,81.133331,28.5,2023-06-08 12:17:32,12:17:32,314.91,314.91,314.91,1001.0,12.0,1686205415,20700,10000.0,252.0,3.95,2.73,0.0,False
1282616,Wali?,83.76667,27.98333,2023-06-08 13:18:39,13:18:39,312.44,312.44,312.44,1000.0,14.0,1686209620,20700,10000.0,155.0,2.75,3.24,12.0,False
1282621,Upardang Gadhi,84.566666,27.766666,2023-06-08 13:18:39,13:18:39,311.1,311.1,311.1,999.0,13.0,1686209622,20700,10000.0,208.0,3.79,4.64,12.0,False
1282635,Tulsipur,82.297256,28.130989,2023-06-08 13:18:39,13:18:39,311.66,311.66,311.66,1001.0,15.0,1686209624,20700,10000.0,231.0,5.56,6.25,35.0,False
1282665,Tikoli,84.5,27.633333,2023-06-08 13:18:39,13:18:39,316.94,316.94,316.94,998.0,12.0,1686209625,20700,10000.0,202.0,3.89,4.71,2.0,False
1282666,?ikapur,81.133331,28.5,2023-06-08 13:18:39,13:18:39,316.76,316.76,316.76,999.0,10.0,1686209627,20700,10000.0,219.0,3.05,2.59,0.0,False
