In [1]:
import numpy as np
import pandas as pd
from pvlib import solarposition, irradiance, atmosphere, location
from datetime import datetime, timedelta
from pathlib import Path
from data_preparation import create_time_features

def create_absolute_path(filename):
    project_dir = Path.cwd().parent
    data_dir = project_dir / 'data'
    data_dir.mkdir(parents=True, exist_ok=True)
    file_path = data_dir / filename
    return file_path

def generate_timestamps(start_date, num_samples, freq='H'):
    base = datetime.strptime(start_date, '%Y-%m-%d')
    timestamps = [base + timedelta(hours=i) for i in range(num_samples)]
    return timestamps

def generate_data(num_samples):
    timestamps = generate_timestamps('2021-01-01', num_samples)
    DE_wind_generation_actual = np.random.uniform(0, 60, num_samples)
    DE_solar_generation_actual = np.random.uniform(-180, 180, num_samples)
    cumulated_hours = np.random.uniform(0, 100, num_samples)
    lat = np.random.uniform(0, 25, num_samples)
    lon = np.random.uniform(-40, 50, num_samples)
    v1 = np.random.uniform(0, 100, num_samples)
    v2 = np.random.uniform(0, 100, num_samples)
    v_50m = np.random.uniform(0, 25, num_samples)
    h1 = np.random.uniform(0, 25, num_samples)
    h2 = np.random.uniform(0, 25, num_samples)
    z0 = np.random.uniform(0, 25, num_samples)
    SWTDN = np.random.uniform(0, 25, num_samples)
    SWGDN = np.random.uniform(0, 25, num_samples)
    T = np.random.uniform(0, 25, num_samples)
    rho = np.random.uniform(0, 25, num_samples)
    p = np.random.uniform(0, 25, num_samples)
    hour = np.random.uniform(0, 25, num_samples)
    day_of_week = np.random.uniform(0, 25, num_samples)
    month = np.random.uniform(0, 25, num_samples)
    day_of_year = np.random.uniform(0, 25, num_samples)

    data = pd.DataFrame({
        'timestamp': timestamps,
        'DE_wind_generation_actual': DE_wind_generation_actual,
        'DE_solar_generation_actual': DE_solar_generation_actual,
        'cumulated hours': cumulated_hours,
        'lat': lat,
        'lon': lon,
        'v1': v1,
        'v2': v2,
        'v_50m': v_50m,
        'h1': h1,
        'h2': h2,
        'z0': z0,
        'SWTDN': SWTDN,
        'SWGDN': SWGDN,
        'T': T,
        'rho': rho,
        'p': p,
        'hour': hour,
        'day_of_week': day_of_week,
        'month': month,
        'day_of_year': day_of_year,
    })
    
    data = create_time_features(data)

    return data

def generate_wind_data(num_samples):
    timestamps = generate_timestamps('2021-01-01', num_samples)
    latitudes = np.random.uniform(0, 60, num_samples)
    longitudes = np.random.uniform(-180, 180, num_samples)
    production = np.random.uniform(0, 100, num_samples)
    wind_speed = np.random.uniform(0, 25, num_samples)
    temperature = np.random.uniform(-40, 50, num_samples)
    humidity = np.random.uniform(0, 100, num_samples)
    precipitation = np.random.uniform(0, 100, num_samples)
    wind_speed_weather_forecast = np.random.uniform(0, 25, num_samples)

    wind_data = pd.DataFrame({
        'timestamp': timestamps,
        'latitude': latitudes,
        'longitude': longitudes,
        'production': production,
        'wind_speed': wind_speed,
        'temperature': temperature,
        'humidity': humidity,
        'precipitation': precipitation,
        'wind_speed_weather_forecast': wind_speed_weather_forecast
    })

    wind_data = create_time_features(wind_data)

    return wind_data

def generate_solar_data(num_samples):
    # Generate random latitude and longitude values
    timestamps = generate_timestamps('2021-01-01', num_samples)
    latitudes = np.random.uniform(low=-90, high=90, size=num_samples)
    longitudes = np.random.uniform(low=-180, high=180, size=num_samples)
    altitudes = np.random.uniform(low=0, high=3000, size=num_samples)
    production = np.random.uniform(low=0, high=100, size=num_samples)
    solar_irradiance = np.random.uniform(low=0, high=1000, size=num_samples)
    temperature = np.random.uniform(low=-20, high=40, size=num_samples)
    humidity = np.random.uniform(low=0, high=100, size=num_samples)
    precipitation = np.random.uniform(low=0, high=100, size=num_samples)

    solar_data = pd.DataFrame({
        'timestamp': timestamps,
        'latitude': latitudes,
        'longitude': longitudes,
        'altitude': altitudes,
        'production': production,
        'solar_irradiance': solar_irradiance,
        'temperature': temperature,
        'humidity': humidity,
        'precipitation': precipitation
    })

    solar_data = create_time_features(solar_data)

    return solar_data


num_samples = 1000
data = generate_data(num_samples)

data.to_csv(create_absolute_path('solar_wind_data.csv'), index=False)

In [3]:
data.head()

Unnamed: 0,timestamp,DE_wind_generation_actual,DE_solar_generation_actual,cumulated hours,lat,lon,v1,v2,v_50m,h1,...,z0,SWTDN,SWGDN,T,rho,p,hour,day_of_week,month,day_of_year
0,1970-01-01 00:00:00.000000000,14.961126,141.095487,23.8114,18.162091,-7.704158,23.709989,83.982759,14.602475,18.685433,...,6.613097,8.152771,5.539393,6.410016,0.747836,9.088553,0,9.692575,1,1
1,1970-01-01 00:00:00.000000001,5.418316,123.968548,32.392952,21.794286,-3.605879,35.438232,14.271031,19.200782,16.262991,...,22.431919,5.678648,5.06798,23.533398,19.442966,10.124543,0,1.22017,1,1
2,1970-01-01 00:00:00.000000002,56.434291,-126.728352,49.64142,0.997103,20.225008,0.254227,59.922078,9.639403,22.002166,...,18.372101,24.174617,23.922198,15.807659,24.199549,15.856314,0,17.910017,1,1
3,1970-01-01 00:00:00.000000003,49.72603,-129.643737,2.85416,10.901074,8.332397,8.807283,22.830077,18.21064,19.182298,...,13.29159,6.931119,10.129242,23.894282,8.27666,7.328639,0,24.323746,1,1
4,1970-01-01 00:00:00.000000004,0.310268,176.90491,64.831332,23.236574,30.824183,48.276774,77.514081,4.169827,1.880394,...,21.407928,4.989943,20.734489,21.511755,20.802086,13.996108,0,14.202988,1,1
