In [23]:
import pandas as pd
import numpy as np
import requests
from datetime import datetime
import urllib

# Getting historical data

In [24]:
# antwerp location
LONGITUDE = 4.40
LATITUDE = 51.22


In [25]:
def get_historical_weather(from_date, to_date, longitude, latitude):
    daily = [
            'weathercode',
            'temperature_2m_max',
            'temperature_2m_mean',
            'sunrise','sunset',
            'windspeed_10m_max',
            'winddirection_10m_dominant']
    
    hourly = [
        'temperature_2m',
        'cloudcover',
        'direct_radiation'
    ]
    data = {
        'latitude':latitude,
        'longitude':longitude,
        'start_date':from_date,
        'hourly': ','.join(hourly),
        'end_date':to_date,
        'daily':','.join(daily),
        'timezone': 'auto'
    
    }
    url = urllib.parse.urlencode(data, safe=',')
    r = requests.get("https://archive-api.open-meteo.com/v1/archive", params=url)
    print(r.url)
    return r.json()

In [26]:
def get_mean_daylight_temp(hourly_temp_df,sunrise_df, sunset_df):
    sun_idx = 0
    new_df = hourly_temp_df
    avg_temps = []
    cloudcover =[]
    direct_radiation = []
    mean_arr =[]
    cloud_cover_avg = []
    direct_radiation_avg =[]
    for i, row in hourly_temp_df.iterrows():
        if (sun_idx<sunrise_df.shape[0]):
            if (sunrise_df.iloc[sun_idx]< row['time']):
                if (row['time'] > sunset_df[sun_idx]):
                    sun_idx = sun_idx +1
                    avg_temps.append(np.median(mean_arr))
                    cloudcover.append(np.median(cloud_cover_avg))
                    direct_radiation.append(np.median(direct_radiation_avg))
                    new_df.drop(i, inplace=True)
                    mean_arr=[]
                    cloud_cover_avg = []
                    direct_radiation_avg =[]
                    continue
                mean_arr.append(row['temperature_2m'])
                cloud_cover_avg.append(row['cloudcover'])
                direct_radiation_avg.append(row['direct_radiation'])
            else:
                new_df.drop(i, inplace=True)
    return avg_temps, cloudcover, direct_radiation

In [27]:
train_df = pd.read_csv('./PV_Elec_Gas3.csv')
train_df.head()

Unnamed: 0,date,Cumulative_solar_power,kWh electricity/day,Gas/day
0,26/10/2011,0.1,15.1,9.0
1,27/10/2011,10.2,7.4,9.2
2,28/10/2011,20.2,5.8,8.0
3,29/10/2011,29.6,4.9,6.6
4,30/10/2011,34.2,11.7,5.3


In the first day, energy produced looks extremely low. Maybe the solar panels were installed that day so I will drop that row.

In [28]:
def preprocess(df):
    
    df['date'] = df['date'].apply(lambda x: datetime.strptime(x,'%d/%m/%Y').strftime('%Y-%m-%d'))
    
    solar_power_per_day = np.zeros(df['Cumulative_solar_power'].shape[0])
    solar_power_per_day[0] = df['Cumulative_solar_power'].iloc[0]
    
    for i, row in df.iterrows():
        if (i+1)< df.shape[0]:
            solar_power_per_day[i+1] = df['Cumulative_solar_power'].iloc[i+1] - df['Cumulative_solar_power'].iloc[i]
    
    df['solar_produced_per_day'] = solar_power_per_day
    df.drop(0, inplace=True)
    df.reset_index(drop=True, inplace=True)
    
    weather_data = get_historical_weather(df['date'].iloc[0], df['date'].iloc[df.shape[0]-1],LONGITUDE, LATITUDE)
    hourly_df =   pd.DataFrame(weather_data['hourly'])
    
    hourly_df['time'] = hourly_df['time'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M'))
    
    weather_df = pd.DataFrame(weather_data['daily'])
    
    sunrise =  weather_df['sunrise'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M'))
    sunset = weather_df['sunset'].apply(lambda x: datetime.strptime(x,'%Y-%m-%dT%H:%M'))
    daylight = ((sunset-sunrise)/np.timedelta64(1, 'm'))
    df['daylight'] = daylight
    df['daylight_avg_temp'], df['avg_cloudcover'], df['avg_direct_radiation'] = get_mean_daylight_temp(hourly_df,sunrise, sunset)
    df["weather"] = weather_df["weathercode"]
    df["max_wind_speed"] = weather_df['windspeed_10m_max']
    df["max_temp"] = weather_df['temperature_2m_max']
    df.drop(['Cumulative_solar_power', 'kWh electricity/day', 'Gas/day'], axis=1, inplace=True)
    return df


In [29]:
train_df = preprocess(train_df)

https://archive-api.open-meteo.com/v1/archive?latitude=51.22&longitude=4.4&start_date=2011-10-27&hourly=temperature_2m,cloudcover,direct_radiation&end_date=2020-11-10&daily=weathercode,temperature_2m_max,temperature_2m_mean,sunrise,sunset,windspeed_10m_max,winddirection_10m_dominant&timezone=auto


In [30]:
train_df.to_csv('solar_panels.csv')