In [1]:
import pandas as pd
import numpy as np

In [9]:
df = pd.read_csv('../input/wind/processed_sfo_santacruz_20_years.csv')
df

Unnamed: 0,locationname,datetime,windspeed,winddir
0,SFO,2004-01-01 00:00:00,12.3,156.0
1,SFO,2004-01-01 01:00:00,12.3,153.0
2,SFO,2004-01-01 02:00:00,11.6,136.0
3,SFO,2004-01-01 03:00:00,18.9,167.0
4,SFO,2004-01-01 04:00:00,14.6,154.0
...,...,...,...,...
1394862,Watsonville,2023-12-31 19:00:00,0.2,354.0
1394863,Watsonville,2023-12-31 20:00:00,3.1,331.0
1394864,Watsonville,2023-12-31 21:00:00,3.3,343.0
1394865,Watsonville,2023-12-31 22:00:00,4.3,341.0


In [10]:
def round_to_nearest_45(n):
    return round(n / 45) * 45

def convert_to_int(n):
    return int(n)

In [11]:
df['datetime'] = pd.to_datetime(df['datetime'])
df['hour'] = df['datetime'].dt.hour
# Impute missing values in 'winddir'
df['winddir'] = df['winddir'].fillna(df['winddir'].mean())


def calculate_hourly_percentiles(data, percentile):
    # Create an empty DataFrame to store percentile values for each hour
    percentile_values = []

    # Loop over each hour
    for hour in range(24):
        # Filter data for the specific hour
        hourly_data = data[data['hour'] == hour]

        # Calculate the Xth percentile for wind speed and wind direction
        wind_speed_percentile = np.percentile(hourly_data['windspeed'], percentile)
        wind_dir_percentile = np.percentile(hourly_data['winddir'], percentile)

        # Append the results to the list
        percentile_values.append({'hour': hour, 'windspeed': wind_speed_percentile, 'winddir': wind_dir_percentile})

    # Convert list to DataFrame
    df = pd.DataFrame(percentile_values)
    df['windspeed'] = df['windspeed'].fillna(0)
    df['winddir'] = df['winddir'].fillna(0)
    df['winddir'] = df['winddir'].apply(round_to_nearest_45)
    df['windspeed'] = df['windspeed'].apply(convert_to_int)     
    return df

# hourly_percentiles = calculate_hourly_percentiles(df, 99)
# hourly_percentiles

In [12]:
import sqlite3
# Correcting the query with the right column names
query_final = '''
SELECT flight_distance, flight_time, energy_consumption, wind_direction_degrees, wind_magnitude_mph
FROM flight_metrics
'''

conn = sqlite3.connect('../input/wind/icrat.sqlite')

# Read the data into a DataFrame with the final query
try:
    df_flight_metrics = pd.read_sql_query(query_final, conn)
    message_final = "Data loaded successfully from the SQLite database."
except Exception as e:
    df_flight_metrics = None
    message_final = str(e)

df_flight_metrics.sort_values(by=['flight_distance', 
                                  'wind_magnitude_mph', 
                                  'wind_direction_degrees'], ascending=True, inplace=True)
df_flight_metrics.reset_index(drop=True, inplace=True)
df_flight_metrics = df_flight_metrics.groupby(['flight_distance', 
                                               'wind_direction_degrees', 
                                               'wind_magnitude_mph']).mean().reset_index()

df_flight_metrics

Unnamed: 0,flight_distance,wind_direction_degrees,wind_magnitude_mph,flight_time,energy_consumption
0,20,0,0,10.096742,21.779902
1,20,0,1,10.075754,21.746685
2,20,0,2,10.054998,21.715017
3,20,0,3,10.034471,21.684900
4,20,0,4,10.014169,21.656342
...,...,...,...,...,...
1714,60,360,36,20.382043,40.054396
1715,60,360,37,20.316793,40.001717
1716,60,360,38,20.252147,39.952310
1717,60,360,39,20.188095,39.906229


In [13]:
from scipy.spatial.distance import cdist

def match_and_add_flight_metrics(percentile_df, flight_metrics_df):
    results_list = []
    for _, row in percentile_df.iterrows():
        windspeed = row['windspeed']
        winddir = row['winddir']
        distances = cdist([[windspeed, winddir]], flight_metrics_df[['wind_magnitude_mph', 'wind_direction_degrees']].values)
        min_distance_index = distances.argmin()
        closest_match = flight_metrics_df.iloc[min_distance_index][['flight_distance', 'flight_time', 'energy_consumption']]
        results_list.append(closest_match)
    results_df = pd.DataFrame(results_list).reset_index(drop=True)
    results_df['flight_direction'] = 'A-B'

    return pd.concat([percentile_df, results_df], axis=1)

def process_and_match_for_B_A(percentile_df, flight_metrics_df):
    # Adjust the wind direction for 'B-A'
    percentile_df['winddir'] = abs(percentile_df['winddir'] - 180)

    # Use the existing function to match and add flight metrics
    return match_and_add_flight_metrics(percentile_df, flight_metrics_df)


# Specified Percentiles
percentiles = [50, 60, 70, 80, 90, 95, 99, 99.5]

all_results = []  # List to store all result DataFrames

for flight_distance, group_df in df_flight_metrics.groupby('flight_distance'):
    for p in percentiles:
        percentile_df = calculate_hourly_percentiles(df, p)

        # Process for A-B Direction
        ab_results = match_and_add_flight_metrics(percentile_df, group_df)
        ab_results['flight_direction'] = 'A-B'
        ab_results['percentile'] = p
        ab_results['flight_distance'] = flight_distance

        # Process for B-A Direction
        ba_results = process_and_match_for_B_A(percentile_df.copy(), group_df)
        ba_results['flight_direction'] = 'B-A'
        ba_results['percentile'] = p
        ba_results['flight_distance'] = flight_distance

        # Combine A-B and B-A results
        combined_results = pd.concat([ab_results, ba_results])
        all_results.append(combined_results)

# Combine all results into a single DataFrame
final_results_df = pd.concat(all_results).reset_index(drop=True)


In [29]:
final_results_df

Unnamed: 0,hour,windspeed,winddir,flight_distance,flight_time,energy_consumption,flight_direction,percentile
0,0,4,270,20,10.097690,21.781382,A-B,50.0
1,1,3,270,20,10.097275,21.780733,A-B,50.0
2,2,3,270,20,10.097275,21.780733,A-B,50.0
3,3,3,270,20,10.097275,21.780733,A-B,50.0
4,4,3,225,20,10.142328,21.855943,A-B,50.0
...,...,...,...,...,...,...,...,...
1915,19,28,180,60,26.363949,49.582326,B-A,99.5
1916,20,26,180,60,26.100918,49.085175,B-A,99.5
1917,21,25,180,60,25.971976,48.842442,B-A,99.5
1918,22,24,180,60,25.844706,48.603530,B-A,99.5


In [16]:
ab = final_results_df[final_results_df['flight_direction'] == 'A-B']
ba = final_results_df[final_results_df['flight_direction'] == 'B-A']

In [31]:
percentile_id = ab['percentile'].to_numpy().reshape(40, 24)[:,0]
distance_id = ab['flight_distance'].to_numpy().reshape(40, 24)[:,0]

In [32]:
ab_flight_time = ab['flight_time'].to_numpy().reshape(40, 24)
ba_flight_time = ba['flight_time'].to_numpy().reshape(40, 24)

ab_energy_consumption = ab['energy_consumption'].to_numpy().reshape(40, 24)
ba_energy_consumption = ba['energy_consumption'].to_numpy().reshape(40, 24)

In [77]:
params = {}

for i in range(len(ab_flight_time)):
    flight_time = np.zeros((289, 2, 2))
    flight_time[1:289,0,1] = np.repeat(ab_flight_time[i,:],12)
    flight_time[1:289,1,0] = np.repeat(ba_flight_time[i,:],12)

    energy_consumption = np.zeros((289, 2, 2))
    energy_consumption[1:289,0,1] = np.repeat(ab_energy_consumption[i,:],12)
    energy_consumption[1:289,1,0] = np.repeat(ba_energy_consumption[i,:],12)

    run_id = f"dist_{int(distance_id[i])}_per_{int(percentile_id[i]*10)}"
    if run_id not in params:
        params[run_id] = {}
    params[run_id]['flight_time'] = flight_time
    params[run_id]['energy_consumption'] = energy_consumption


dist_20_per_500
dist_20_per_600
dist_20_per_700
dist_20_per_800
dist_20_per_900
dist_20_per_950
dist_20_per_990
dist_20_per_995
dist_30_per_500
dist_30_per_600
dist_30_per_700
dist_30_per_800
dist_30_per_900
dist_30_per_950
dist_30_per_990
dist_30_per_995
dist_40_per_500
dist_40_per_600
dist_40_per_700
dist_40_per_800
dist_40_per_900
dist_40_per_950
dist_40_per_990
dist_40_per_995
dist_50_per_500
dist_50_per_600
dist_50_per_700
dist_50_per_800
dist_50_per_900
dist_50_per_950
dist_50_per_990
dist_50_per_995
dist_60_per_500
dist_60_per_600
dist_60_per_700
dist_60_per_800
dist_60_per_900
dist_60_per_950
dist_60_per_990
dist_60_per_995


In [None]:
# 50 percent 80 percent

In [81]:
import pickle
with open('../input/wind/wind_params.pkl', 'wb') as f:
    pickle.dump(params, f)

In [82]:
valid_runs = []
for i in range(20, 70, 10):
    for j in [500, 600, 700, 800, 900, 950, 990, 995]:
        if i not in distance and j not in percentile:
            valid_runs.append((f'dist_{i}_per_{j}', params))


In [85]:
import os 

In [87]:
file_list = os.listdir('../output/ICRAT_wind/fleet_op_result')
all_files = []
for filename in file_list:
    if filename.endswith('_fleetsize.txt'):
        all_files.append(filename)
file_names = np.empty(shape=(len(all_files), 2))
for i in all_files:
    file_names = np.vstack((file_names, np.array(i.split('_')[:2])))

In [89]:
file_list

[]