In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import skew, kurtosis

import glob
import os
import plotly.express as px
import json

# --------------------------------------------------------------------------------------------

In [2]:
folder_path_plant1 = 'D:/Py_Renewable/NK_Wind/Offshore_nc/Dataset/csv/plant_1'
folder_path_comp = 'D:/Py_Renewable/NK_Wind/Offshore_nc/Dataset/csv/comparison_grid'

file_list_plant = glob.glob(os.path.join(folder_path_plant1, '*.csv')) # 100m
file_list_comp = glob.glob(os.path.join(folder_path_comp, '*.csv')) # 100m

In [3]:
dataframes_plant = {}
dataframes_comp = {}

In [4]:
for file_path in file_list_plant:
    # Get base file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Read CSV into a dataframe
    df = pd.read_csv(file_path)
    
    # Store in dictionary with file name as key
    dataframes_plant[file_name] = df

In [5]:
for file_path in file_list_comp:
    # Get base file name without extension
    file_name = os.path.splitext(os.path.basename(file_path))[0]
    
    # Read CSV into a dataframe
    df = pd.read_csv(file_path)
    
    # Store in dictionary with file name as key
    dataframes_comp[file_name] = df

In [6]:
for name, df in dataframes_plant.items():
    if 'Unnamed: 0' in df.columns:
        df.drop(columns='Unnamed: 0', inplace=True)

for name, df in dataframes_comp.items():
    if 'Unnamed: 0' in df.columns:
        df.drop(columns='Unnamed: 0', inplace=True)


In [7]:
dataframes_plant

{'wind_data_37,123.25':      wind_speed  wind_direction
 0      3.168403      153.067798
 1      3.713204      177.220542
 2      7.009467      144.435138
 3      9.287383      230.947424
 4      4.775892      137.090441
 ..          ...             ...
 361   10.759671      147.411049
 362   11.115592      140.385546
 363    7.866152      201.891272
 364    7.899241      185.738493
 365    7.498455      123.333943
 
 [366 rows x 2 columns],
 'wind_data_37,123.5':      wind_speed  wind_direction
 0      3.089627      154.347233
 1      4.037787      175.246603
 2      7.286200      144.540729
 3      9.108219      229.522130
 4      4.815958      142.051409
 ..          ...             ...
 361   10.661368      143.980759
 362   11.286695      137.038234
 363    7.884811      199.788838
 364    8.335396      187.145111
 365    7.866362      122.815945
 
 [366 rows x 2 columns],
 'wind_data_37,123.75':      wind_speed  wind_direction
 0      3.008814      155.847554
 1      4.210420    

# --------------------------------------------------------------------------------------------

In [8]:
summary_stats = []

# Plant data - use original name
for name, df in dataframes_plant.items():
    stats = {
        'name': name,
        'mean': df['wind_speed'].mean(),
        'std': df['wind_speed'].std(),
        'min': df['wind_speed'].min(),
        'max': df['wind_speed'].max(),
        'kurtosis': df['wind_speed'].kurtosis(),
        'skewness': df['wind_speed'].skew(),
        'median': df['wind_speed'].median(),
    }
    summary_stats.append(stats)

# Comp data - add "_C" to name before putting in stats
for name, df in dataframes_comp.items():
    name_comp = f"{name}_C"
    stats = {
        'name': name_comp,
        'mean': df['wind_speed'].mean(),
        'std': df['wind_speed'].std(),
        'min': df['wind_speed'].min(),
        'max': df['wind_speed'].max(),
        'kurtosis': df['wind_speed'].kurtosis(),
        'skewness': df['wind_speed'].skew(),
        'median': df['wind_speed'].median(),
    }
    summary_stats.append(stats)

# Create DataFrame
summary_df = pd.DataFrame(summary_stats)

# Optional: set 'name' as index
summary_df.set_index('name', inplace=True)

# --------------------------------------------------------------------------------------------

In [9]:
summary_stats_D = []

# Plant data - use original name
for name, df in dataframes_plant.items():
    stats = {
        'name': name,
        'mean': df['wind_direction'].mean(),
        'std': df['wind_direction'].std(),
        'min': df['wind_direction'].min(),
        'max': df['wind_direction'].max(),
        'kurtosis': df['wind_direction'].kurtosis(),
        'skewness': df['wind_direction'].skew(),
        'median': df['wind_direction'].median(),
    }
    summary_stats_D.append(stats)

# Comp data - add "_C" to name before putting in stats
for name, df in dataframes_comp.items():
    name_comp = f"{name}_C"
    stats = {
        'name': name_comp,
        'mean': df['wind_direction'].mean(),
        'std': df['wind_direction'].std(),
        'min': df['wind_direction'].min(),
        'max': df['wind_direction'].max(),
        'kurtosis': df['wind_direction'].kurtosis(),
        'skewness': df['wind_direction'].skew(),
        'median': df['wind_direction'].median(),
    }
    summary_stats_D.append(stats)

# Create DataFrame
summary_dfD = pd.DataFrame(summary_stats_D)

# Optional: set 'name' as index
summary_dfD.set_index('name', inplace=True)

In [10]:
summary_dfD

Unnamed: 0_level_0,mean,std,min,max,kurtosis,skewness,median
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
"wind_data_37,123.25",169.657833,87.487039,1.018153,359.141727,-1.019787,0.128803,156.920582
"wind_data_37,123.5",170.380842,87.445642,0.041965,359.297471,-1.031438,0.136488,156.405428
"wind_data_37,123.75",173.114266,87.570914,5.457021,359.319579,-1.021327,0.182315,158.290910
"wind_data_37,123",169.729903,87.222205,0.443705,359.847995,-1.040641,0.124845,155.886838
"wind_data_37,124.25",171.074187,87.201151,1.275257,359.318536,-1.017932,0.173177,159.567994
...,...,...,...,...,...,...,...
"wind_data_37.75,124.75_C",171.999392,89.493179,4.724406,357.634417,-1.000599,0.203155,150.999010
"wind_data_37.75,125.25_C",172.718389,90.740905,6.099260,356.965459,-0.972108,0.242383,149.815847
"wind_data_37.75,125.75_C",173.921412,91.178404,1.711163,356.144521,-0.850663,0.136668,161.351774
"wind_data_37.75,125_C",171.767156,89.687022,0.607295,353.258311,-1.016736,0.234257,147.465203


In [11]:
combined_wind_speed = pd.DataFrame()

for name, df in dataframes_plant.items():
    # Make sure the index is aligned to day number (if needed)
    # You can adjust this depending on how your index is set
    wind_series = df['wind_speed'].reset_index(drop=True)
    
    # Assign to new DataFrame with the region name as the column
    combined_wind_speed[name] = wind_series

    
for name, df in dataframes_comp.items():
    name_comp = f"{name}_C"
    # Make sure the index is aligned to day number (if needed)
    # You can adjust this depending on how your index is set
    wind_series = df['wind_speed'].reset_index(drop=True)
    
    # Assign to new DataFrame with the region name as the column
    combined_wind_speed[name_comp] = wind_series

  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wind_speed[name_comp] = wind_series
  combined_wi

In [12]:
combined_wind_Direction = pd.DataFrame()

for name, df in dataframes_plant.items():
    # Make sure the index is aligned to day number (if needed)
    # You can adjust this depending on how your index is set
    wind_series = df['wind_direction'].reset_index(drop=True)
    
    # Assign to new DataFrame with the region name as the column
    combined_wind_Direction[name] = wind_series

    
for name, df in dataframes_comp.items():
    name_comp = f"{name}_C"
    # Make sure the index is aligned to day number (if needed)
    # You can adjust this depending on how your index is set
    wind_series = df['wind_direction'].reset_index(drop=True)
    
    # Assign to new DataFrame with the region name as the column
    combined_wind_Direction[name_comp] = wind_series

  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_comp] = wind_series
  combined_wind_Direction[name_

In [13]:
os.makedirs('output', exist_ok=True)
combined_wind_Direction.to_csv('output/combined_wind_Direction.csv')
summary_dfD.to_csv('output/summary_winddirection.csv')
combined_wind_speed.to_csv('output/combined_wind_speed.csv')
summary_df.to_csv('output/summary_windspeed.csv')

In [14]:
# Load your CSV
summary_df = pd.read_csv("output/summary_windspeed.csv")

# Extract lat/lon
summary_df[['lat', 'lon']] = summary_df['name'].str.extract(r'wind_data_(\d+\.?\d*),(\d+\.?\d*)').astype(float)

# Plot
fig = px.scatter_mapbox(
    summary_df,
    lat='lat',
    lon='lon',
    color='mean',
    size='mean',
    hover_name='name',
    hover_data=['mean', 'std', 'median'],
    color_continuous_scale='Viridis',
    size_max=15,
    zoom=5,
    height=600
)

fig.update_layout(mapbox_style='open-street-map')
fig.update_layout(title='Wind Speed Summary by Location based on ERA5 Dataset (Wind Speed above 100m)')
fig.show()

# --------------------------------------------------------------------------------------------

In [15]:
stats_df = pd.read_csv("output/combined_wind_Direction.csv", header=None)
stats_df.drop(columns=[0], inplace=True)
stats_df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,228,229,230,231,232,233,234,235,236,237
0,"wind_data_37,123.25","wind_data_37,123.5","wind_data_37,123.75","wind_data_37,123","wind_data_37,124.25","wind_data_37,124.5","wind_data_37,124","wind_data_37.25,123.25","wind_data_37.25,123.5","wind_data_37.25,123.75",...,"wind_data_37.5,125.5_C","wind_data_37.5,125.75_C","wind_data_37.5,125_C","wind_data_37.5,126.25_C","wind_data_37.5,126_C","wind_data_37.75,124.75_C","wind_data_37.75,125.25_C","wind_data_37.75,125.75_C","wind_data_37.75,125_C","wind_data_37.75,126_C"
1,153.067797973348,154.34723274819,155.847554199476,150.997964136843,161.429679056635,163.465776422,158.19009995501,155.701202926761,158.193394291967,159.91588489217,...,175.570130598145,171.238779954301,177.78499226675,229.951136982671,172.831733809233,176.109334405948,183.403539131321,194.782039897122,179.705445991936,215.35273243373
2,177.220542356869,175.246602907368,173.952099584119,179.679054222394,174.8677326558,175.469120845429,173.796369119535,180.525653249496,178.974513156077,177.485552156826,...,171.761848633012,168.447820388332,178.219427384931,157.626820676223,164.419298346518,180.847040898428,177.844655135805,179.634405599046,179.089655489626,179.631371251509
3,144.435138082896,144.540729039475,144.125666753174,143.802299550019,143.092081704722,141.972227705034,144.070831030283,146.953899148664,146.470003328583,145.708101588999,...,137.484471747961,138.336713175702,137.795148758784,137.520836040001,138.113351838241,137.007962107102,132.093312202632,133.053907433592,134.13776346518,132.096774901046
4,230.94742367359,229.52213011067,228.269592022991,233.239559382675,226.859197424628,226.289448851973,227.397040014225,232.512402791112,230.331625850966,228.824769059941,...,224.909292078162,225.096907918535,227.845440979999,247.261709974082,230.771860317079,229.524277822966,230.294553137301,237.814995730686,229.938160451919,247.436611288042
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,147.411049186678,143.980759272896,140.330417704246,150.565114681734,133.940516983257,131.74460942482,136.791219790999,143.812164895888,139.830130884539,135.816274907767,...,118.998446815221,118.52644996875,125.042788306202,123.718909216395,120.961949816701,126.894346887722,120.5981019552,119.388369736109,124.640955537031,123.34211748487
363,140.385545895755,137.038233885156,134.606486548842,143.864758118902,131.698351493698,131.255718515915,132.727971227682,138.433436221116,135.916737034154,134.046495808779,...,122.771576392296,119.921455771627,131.072748310102,122.890529266558,120.124311379574,132.980105801019,125.041311144348,119.799181491077,130.211439183386,121.775637853324
364,201.891272424654,199.788838192346,197.229275889916,205.055555563367,192.186584649789,189.221618288715,194.720584721273,204.660716984948,201.385680622212,198.044407151982,...,250.497914842634,271.912194754237,192.811289202859,273.185163208401,272.05781456651,196.359076690214,252.573173986634,279.143228710669,214.17255165629,275.932488683734
365,185.738492593212,187.145111189427,188.815798458789,184.706470937871,192.3610318267,194.154699620467,190.527100794345,183.114127952391,185.001075725848,186.84603767027,...,198.798840795634,201.226563680926,194.339439069244,202.769603485883,202.483493589435,190.915879682827,194.077357821987,200.148701826357,192.453470580882,203.32577301506


In [16]:
output_dir_P = os.path.join('output', 'plot_histo_dir', 'Plant')
output_dir_C = os.path.join('output', 'plot_histo_dir', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_df.columns:
    col_data = stats_df[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(8, 6))
    sns.histplot(data, bins=30, edgecolor='black')
    plt.title(str(title))
    plt.xlabel('Direction')
    plt.ylabel('Frequency')
    
    # Build filename from the title (replace spaces with underscores)
    filename = f"histogram_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)
    
    # Save the histogram to the chosen directory
    plt.savefig(filepath, dpi=300)
    plt.close()
    
    print(f"Saved histogram for '{title}' as {filepath}")

Saved histogram for 'wind_data_37,123.25' as output\plot_histo_dir\Plant\histogram_wind_data_37,123.25.png
Saved histogram for 'wind_data_37,123.5' as output\plot_histo_dir\Plant\histogram_wind_data_37,123.5.png
Saved histogram for 'wind_data_37,123.75' as output\plot_histo_dir\Plant\histogram_wind_data_37,123.75.png
Saved histogram for 'wind_data_37,123' as output\plot_histo_dir\Plant\histogram_wind_data_37,123.png
Saved histogram for 'wind_data_37,124.25' as output\plot_histo_dir\Plant\histogram_wind_data_37,124.25.png
Saved histogram for 'wind_data_37,124.5' as output\plot_histo_dir\Plant\histogram_wind_data_37,124.5.png
Saved histogram for 'wind_data_37,124' as output\plot_histo_dir\Plant\histogram_wind_data_37,124.png
Saved histogram for 'wind_data_37.25,123.25' as output\plot_histo_dir\Plant\histogram_wind_data_37.25,123.25.png
Saved histogram for 'wind_data_37.25,123.5' as output\plot_histo_dir\Plant\histogram_wind_data_37.25,123.5.png
Saved histogram for 'wind_data_37.25,123.75

In [17]:
output_dir_P = os.path.join('output', 'plot_scatter_dir', 'Plant')
output_dir_C = os.path.join('output', 'plot_scatter_dir', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_df.columns:
    col_data = stats_df[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(100, 10))
    sns.scatterplot(x=range(len(data)), y=data)
    plt.title(str(title))
    plt.xlabel('Timeline')
    plt.ylabel('Direction')

    # Build a filename from the title (replacing spaces with underscores)
    filename = f"scatter_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the appropriate folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved scatter plot for '{title}' as {filepath}")

Saved scatter plot for 'wind_data_37,123.25' as output\plot_scatter_dir\Plant\scatter_wind_data_37,123.25.png
Saved scatter plot for 'wind_data_37,123.5' as output\plot_scatter_dir\Plant\scatter_wind_data_37,123.5.png
Saved scatter plot for 'wind_data_37,123.75' as output\plot_scatter_dir\Plant\scatter_wind_data_37,123.75.png
Saved scatter plot for 'wind_data_37,123' as output\plot_scatter_dir\Plant\scatter_wind_data_37,123.png
Saved scatter plot for 'wind_data_37,124.25' as output\plot_scatter_dir\Plant\scatter_wind_data_37,124.25.png
Saved scatter plot for 'wind_data_37,124.5' as output\plot_scatter_dir\Plant\scatter_wind_data_37,124.5.png
Saved scatter plot for 'wind_data_37,124' as output\plot_scatter_dir\Plant\scatter_wind_data_37,124.png
Saved scatter plot for 'wind_data_37.25,123.25' as output\plot_scatter_dir\Plant\scatter_wind_data_37.25,123.25.png
Saved scatter plot for 'wind_data_37.25,123.5' as output\plot_scatter_dir\Plant\scatter_wind_data_37.25,123.5.png
Saved scatter pl

In [18]:
output_dir_P = os.path.join('output', 'plot_line_dir', 'Plant')
output_dir_C = os.path.join('output', 'plot_line_dir', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_df.columns:
    col_data = stats_df[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(100, 10))
    sns.lineplot(x=range(len(data)), y=data)
    plt.title(str(title))
    plt.xlabel('Timeline')
    plt.ylabel('Direction')

    # Build a filename from the title (replace spaces with underscores)
    filename = f"lineplot_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the chosen folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved line plot for '{title}' as {filepath}")

Saved line plot for 'wind_data_37,123.25' as output\plot_line_dir\Plant\lineplot_wind_data_37,123.25.png
Saved line plot for 'wind_data_37,123.5' as output\plot_line_dir\Plant\lineplot_wind_data_37,123.5.png
Saved line plot for 'wind_data_37,123.75' as output\plot_line_dir\Plant\lineplot_wind_data_37,123.75.png
Saved line plot for 'wind_data_37,123' as output\plot_line_dir\Plant\lineplot_wind_data_37,123.png
Saved line plot for 'wind_data_37,124.25' as output\plot_line_dir\Plant\lineplot_wind_data_37,124.25.png
Saved line plot for 'wind_data_37,124.5' as output\plot_line_dir\Plant\lineplot_wind_data_37,124.5.png
Saved line plot for 'wind_data_37,124' as output\plot_line_dir\Plant\lineplot_wind_data_37,124.png
Saved line plot for 'wind_data_37.25,123.25' as output\plot_line_dir\Plant\lineplot_wind_data_37.25,123.25.png
Saved line plot for 'wind_data_37.25,123.5' as output\plot_line_dir\Plant\lineplot_wind_data_37.25,123.5.png
Saved line plot for 'wind_data_37.25,123.75' as output\plot_l

In [19]:
output_dir_P = os.path.join('output', 'plot_boxplot_dir', 'Plant')
output_dir_C = os.path.join('output', 'plot_boxplot_dir', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_df.columns:
    col_data = stats_df[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the boxplot using Seaborn
    plt.figure(figsize=(8, 6))
    sns.boxplot(y=data)
    plt.title(str(title))
    plt.ylabel('Direction')

    # Build a filename from the title (sanitizing spaces)
    filename = f"boxplot_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the target directory
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved boxplot for '{title}' as {filepath}")

Saved boxplot for 'wind_data_37,123.25' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,123.25.png
Saved boxplot for 'wind_data_37,123.5' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,123.5.png
Saved boxplot for 'wind_data_37,123.75' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,123.75.png
Saved boxplot for 'wind_data_37,123' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,123.png
Saved boxplot for 'wind_data_37,124.25' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,124.25.png
Saved boxplot for 'wind_data_37,124.5' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,124.5.png
Saved boxplot for 'wind_data_37,124' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37,124.png
Saved boxplot for 'wind_data_37.25,123.25' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37.25,123.25.png
Saved boxplot for 'wind_data_37.25,123.5' as output\plot_boxplot_dir\Plant\boxplot_wind_data_37.25,123.5.png
Saved boxplot for 'wind_data_37.25,123.75' as output\plot_box

In [20]:
output_dir_P = os.path.join('output', 'plot_violinplot_dir', 'Plant')
output_dir_C = os.path.join('output', 'plot_violinplot_dir', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_df.columns:
    col_data = stats_df[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the violin plot using Seaborn
    plt.figure(figsize=(8, 6))
    sns.violinplot(y=data)
    plt.title(str(title))
    plt.ylabel('Direction')

    # Build a filename from the title (replace spaces with underscores)
    filename = f"violin_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the appropriate folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved violin plot for '{title}' as {filepath}")

Saved violin plot for 'wind_data_37,123.25' as output\plot_violinplot_dir\Plant\violin_wind_data_37,123.25.png
Saved violin plot for 'wind_data_37,123.5' as output\plot_violinplot_dir\Plant\violin_wind_data_37,123.5.png
Saved violin plot for 'wind_data_37,123.75' as output\plot_violinplot_dir\Plant\violin_wind_data_37,123.75.png
Saved violin plot for 'wind_data_37,123' as output\plot_violinplot_dir\Plant\violin_wind_data_37,123.png
Saved violin plot for 'wind_data_37,124.25' as output\plot_violinplot_dir\Plant\violin_wind_data_37,124.25.png
Saved violin plot for 'wind_data_37,124.5' as output\plot_violinplot_dir\Plant\violin_wind_data_37,124.5.png
Saved violin plot for 'wind_data_37,124' as output\plot_violinplot_dir\Plant\violin_wind_data_37,124.png
Saved violin plot for 'wind_data_37.25,123.25' as output\plot_violinplot_dir\Plant\violin_wind_data_37.25,123.25.png
Saved violin plot for 'wind_data_37.25,123.5' as output\plot_violinplot_dir\Plant\violin_wind_data_37.25,123.5.png
Saved v

# -------------------------------------------------------------------------------------------

In [21]:
stats_sp = pd.read_csv("output/combined_wind_speed.csv", header=None)
stats_sp.drop(columns=[0], inplace=True)
stats_sp

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,228,229,230,231,232,233,234,235,236,237
0,"wind_data_37,123.25","wind_data_37,123.5","wind_data_37,123.75","wind_data_37,123","wind_data_37,124.25","wind_data_37,124.5","wind_data_37,124","wind_data_37.25,123.25","wind_data_37.25,123.5","wind_data_37.25,123.75",...,"wind_data_37.5,125.5_C","wind_data_37.5,125.75_C","wind_data_37.5,125_C","wind_data_37.5,126.25_C","wind_data_37.5,126_C","wind_data_37.75,124.75_C","wind_data_37.75,125.25_C","wind_data_37.75,125.75_C","wind_data_37.75,125_C","wind_data_37.75,126_C"
1,3.16840316544148,3.08962669840285,3.00881388042148,3.18776251324875,2.76293599919975,2.64062297810883,2.90883201034842,3.02767645559157,3.0035611064472,2.97114301148025,...,1.63764376156097,1.11734663383897,2.48629369140423,0.423348897396396,0.664433938160583,2.54241939605822,1.76171337577345,0.838607081494244,2.30807490026091,0.610721671947498
2,3.71320386003237,4.03778659952965,4.21041952161995,3.20449127429975,4.34221099808535,4.34999314627199,4.33443725322773,3.81812718331803,4.0586067553038,4.20165038828585,...,3.73998202644155,3.56411594948026,3.96025949778345,3.15441635224723,3.43313728814128,4.14483543187176,3.93403090265261,3.55924985707767,4.16468366514084,3.39713830963995
3,7.00946703534648,7.28619974742734,7.47799151327317,6.66120790272626,7.80188875776417,7.65992097415336,7.73688994195919,7.31010383990765,7.48562182787649,7.63041044482135,...,5.17366238751757,4.781574581637,6.2097437255097,3.99418175952993,4.48376088151041,6.0016430266249,4.42016389428738,3.39522189239047,5.40194807946082,3.11319101302879
4,9.28738320013835,9.10821913449906,8.84852444448847,9.40290353828575,8.20694809615192,7.83146358182941,8.52497071784109,9.64551193199101,9.4785698420613,9.20256046599243,...,6.23159279372329,5.54753258660522,7.22588575510119,3.65663599538221,4.52725256125155,7.90131607812261,6.48592509182161,4.89316018293414,7.30269195933349,4.07828925045768
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
362,10.7596705533157,10.6613680019128,10.5104335484193,10.7932250925162,10.2375607150659,10.156820345123,10.4078328074281,10.3041846788139,10.1127283482644,9.99695082478737,...,8.08956513244391,7.45605233046348,9.6783716140746,6.96325101566282,7.2280045147023,9.98285156888752,8.58442484784444,7.04288068862199,9.93228450736266,6.97140270069108
363,11.1155918277641,11.2866950941986,11.4414951590646,10.8858166845298,11.6547829169344,11.6598951830846,11.6384587123049,11.136231563046,11.3220901849176,11.5066930245624,...,8.79729512258936,7.91931598940253,10.9649548040991,7.1678372288607,7.5151038633213,11.0677044564336,8.95987965114386,7.1559047118109,10.7001696137655,6.95559463297122
364,7.86615249825236,7.88481130633706,7.78014577467592,7.70275091086023,7.57362091541532,7.4840518592532,7.68485910022131,8.11689035906764,8.05313821132301,7.88808167665783,...,2.09738484823263,2.17675495516106,4.11012946649271,1.47200627629635,1.81880564276808,4.2902648958575,2.23811049651382,2.23395630628911,2.8231004974873,1.76924673034797
365,7.89924149954007,8.33539606784347,8.69961384329806,7.35283181953731,9.22824629332269,9.46413462064785,9.02014364097902,7.59340273791915,8.0171292173095,8.38329548663432,...,9.13891108358733,9.07167105846376,9.21409880566698,8.62706738457719,9.06174670591525,8.47889078894347,8.45362033834138,8.3972325179864,8.6490935969442,8.30544022944234


In [22]:
output_dir_P = os.path.join('output', 'plot_histo_spd', 'Plant')
output_dir_C = os.path.join('output', 'plot_histo_spd', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_sp.columns:
    col_data = stats_sp[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(8, 6))
    sns.histplot(data, bins=30, edgecolor='black')
    plt.title(str(title))
    plt.xlabel('Speed (m/s)')
    plt.ylabel('Frequency')
    
    # Build filename from the title (replace spaces with underscores)
    filename = f"histogram_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)
    
    # Save the histogram to the chosen directory
    plt.savefig(filepath, dpi=300)
    plt.close()
    
    print(f"Saved histogram for '{title}' as {filepath}")

Saved histogram for 'wind_data_37,123.25' as output\plot_histo_spd\Plant\histogram_wind_data_37,123.25.png
Saved histogram for 'wind_data_37,123.5' as output\plot_histo_spd\Plant\histogram_wind_data_37,123.5.png
Saved histogram for 'wind_data_37,123.75' as output\plot_histo_spd\Plant\histogram_wind_data_37,123.75.png
Saved histogram for 'wind_data_37,123' as output\plot_histo_spd\Plant\histogram_wind_data_37,123.png
Saved histogram for 'wind_data_37,124.25' as output\plot_histo_spd\Plant\histogram_wind_data_37,124.25.png
Saved histogram for 'wind_data_37,124.5' as output\plot_histo_spd\Plant\histogram_wind_data_37,124.5.png
Saved histogram for 'wind_data_37,124' as output\plot_histo_spd\Plant\histogram_wind_data_37,124.png
Saved histogram for 'wind_data_37.25,123.25' as output\plot_histo_spd\Plant\histogram_wind_data_37.25,123.25.png
Saved histogram for 'wind_data_37.25,123.5' as output\plot_histo_spd\Plant\histogram_wind_data_37.25,123.5.png
Saved histogram for 'wind_data_37.25,123.75

In [23]:
output_dir_P = os.path.join('output', 'plot_scatter_spd', 'Plant')
output_dir_C = os.path.join('output', 'plot_scatter_spd', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_sp.columns:
    col_data = stats_sp[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(100, 10))
    sns.scatterplot(x=range(len(data)), y=data)
    plt.title(str(title))
    plt.xlabel('Timeline')
    plt.ylabel('Speed(m/s)')

    # Build a filename from the title (replacing spaces with underscores)
    filename = f"scatter_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the appropriate folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved scatter plot for '{title}' as {filepath}")

Saved scatter plot for 'wind_data_37,123.25' as output\plot_scatter_spd\Plant\scatter_wind_data_37,123.25.png
Saved scatter plot for 'wind_data_37,123.5' as output\plot_scatter_spd\Plant\scatter_wind_data_37,123.5.png
Saved scatter plot for 'wind_data_37,123.75' as output\plot_scatter_spd\Plant\scatter_wind_data_37,123.75.png
Saved scatter plot for 'wind_data_37,123' as output\plot_scatter_spd\Plant\scatter_wind_data_37,123.png
Saved scatter plot for 'wind_data_37,124.25' as output\plot_scatter_spd\Plant\scatter_wind_data_37,124.25.png
Saved scatter plot for 'wind_data_37,124.5' as output\plot_scatter_spd\Plant\scatter_wind_data_37,124.5.png
Saved scatter plot for 'wind_data_37,124' as output\plot_scatter_spd\Plant\scatter_wind_data_37,124.png
Saved scatter plot for 'wind_data_37.25,123.25' as output\plot_scatter_spd\Plant\scatter_wind_data_37.25,123.25.png
Saved scatter plot for 'wind_data_37.25,123.5' as output\plot_scatter_spd\Plant\scatter_wind_data_37.25,123.5.png
Saved scatter pl

In [24]:
output_dir_P = os.path.join('output', 'plot_line_spd', 'Plant')
output_dir_C = os.path.join('output', 'plot_line_spd', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_sp.columns:
    col_data = stats_sp[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the histogram using Seaborn
    plt.figure(figsize=(100, 10))
    sns.lineplot(x=range(len(data)), y=data)
    plt.title(str(title))
    plt.xlabel('Timeline')
    plt.ylabel('Speed (m/s)')

    # Build a filename from the title (replace spaces with underscores)
    filename = f"lineplot_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the chosen folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved line plot for '{title}' as {filepath}")

Saved line plot for 'wind_data_37,123.25' as output\plot_line_spd\Plant\lineplot_wind_data_37,123.25.png
Saved line plot for 'wind_data_37,123.5' as output\plot_line_spd\Plant\lineplot_wind_data_37,123.5.png
Saved line plot for 'wind_data_37,123.75' as output\plot_line_spd\Plant\lineplot_wind_data_37,123.75.png
Saved line plot for 'wind_data_37,123' as output\plot_line_spd\Plant\lineplot_wind_data_37,123.png
Saved line plot for 'wind_data_37,124.25' as output\plot_line_spd\Plant\lineplot_wind_data_37,124.25.png
Saved line plot for 'wind_data_37,124.5' as output\plot_line_spd\Plant\lineplot_wind_data_37,124.5.png
Saved line plot for 'wind_data_37,124' as output\plot_line_spd\Plant\lineplot_wind_data_37,124.png
Saved line plot for 'wind_data_37.25,123.25' as output\plot_line_spd\Plant\lineplot_wind_data_37.25,123.25.png
Saved line plot for 'wind_data_37.25,123.5' as output\plot_line_spd\Plant\lineplot_wind_data_37.25,123.5.png
Saved line plot for 'wind_data_37.25,123.75' as output\plot_l

In [25]:
output_dir_P = os.path.join('output', 'plot_boxplot_spd', 'Plant')
output_dir_C = os.path.join('output', 'plot_boxplot_spd', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_sp.columns:
    col_data = stats_sp[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the boxplot using Seaborn
    plt.figure(figsize=(8, 6))
    sns.boxplot(y=data)
    plt.title(str(title))
    plt.ylabel('Speed(m/s)')

    # Build a filename from the title (sanitizing spaces)
    filename = f"boxplot_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the target directory
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved boxplot for '{title}' as {filepath}")

Saved boxplot for 'wind_data_37,123.25' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,123.25.png
Saved boxplot for 'wind_data_37,123.5' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,123.5.png
Saved boxplot for 'wind_data_37,123.75' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,123.75.png
Saved boxplot for 'wind_data_37,123' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,123.png
Saved boxplot for 'wind_data_37,124.25' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,124.25.png
Saved boxplot for 'wind_data_37,124.5' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,124.5.png
Saved boxplot for 'wind_data_37,124' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37,124.png
Saved boxplot for 'wind_data_37.25,123.25' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37.25,123.25.png
Saved boxplot for 'wind_data_37.25,123.5' as output\plot_boxplot_spd\Plant\boxplot_wind_data_37.25,123.5.png
Saved boxplot for 'wind_data_37.25,123.75' as output\plot_box

In [26]:
output_dir_P = os.path.join('output', 'plot_violinplot_spd', 'Plant')
output_dir_C = os.path.join('output', 'plot_violinplot_spd', 'Comparison')

os.makedirs(output_dir_P, exist_ok=True)
os.makedirs(output_dir_C, exist_ok=True)

# Loop over each column starting from the second column (skip the first column)
for col in stats_sp.columns:
    col_data = stats_sp[col]
    
    # Check that there are at least 2 rows (one for the title and one for data)
    if len(col_data) < 2:
         print(f"Column {col} has only {len(col_data)} rows, skipping.")
         continue

    try:
         title = col_data.iloc[0]
    except IndexError as e:
         print(f"Error accessing title in column {col}: {e}")
         continue

    try:
         # Use all rows from index 1 to the end dynamically
         data = pd.to_numeric(col_data.iloc[1:], errors='coerce').dropna()
    except Exception as e:
         print(f"Error converting data in column {col}: {e}")
         continue

    if data.empty:
         print(f"Column {col} has no valid numeric data after the title, skipping.")
         continue
    
    # Choose target directory based on whether the title ends with '_C'
    target_dir = output_dir_C if str(title).endswith('_C') else output_dir_P
    
    # Create the violin plot using Seaborn
    plt.figure(figsize=(8, 6))
    sns.violinplot(y=data)
    plt.title(str(title))
    plt.ylabel('Speed(m/s)')

    # Build a filename from the title (replace spaces with underscores)
    filename = f"violin_{str(title).replace(' ', '_')}.png"
    filepath = os.path.join(target_dir, filename)

    # Save the plot as a PNG file in the appropriate folder
    plt.savefig(filepath, dpi=300)
    plt.close()

    print(f"Saved violin plot for '{title}' as {filepath}")

Saved violin plot for 'wind_data_37,123.25' as output\plot_violinplot_spd\Plant\violin_wind_data_37,123.25.png
Saved violin plot for 'wind_data_37,123.5' as output\plot_violinplot_spd\Plant\violin_wind_data_37,123.5.png
Saved violin plot for 'wind_data_37,123.75' as output\plot_violinplot_spd\Plant\violin_wind_data_37,123.75.png
Saved violin plot for 'wind_data_37,123' as output\plot_violinplot_spd\Plant\violin_wind_data_37,123.png
Saved violin plot for 'wind_data_37,124.25' as output\plot_violinplot_spd\Plant\violin_wind_data_37,124.25.png
Saved violin plot for 'wind_data_37,124.5' as output\plot_violinplot_spd\Plant\violin_wind_data_37,124.5.png
Saved violin plot for 'wind_data_37,124' as output\plot_violinplot_spd\Plant\violin_wind_data_37,124.png
Saved violin plot for 'wind_data_37.25,123.25' as output\plot_violinplot_spd\Plant\violin_wind_data_37.25,123.25.png
Saved violin plot for 'wind_data_37.25,123.5' as output\plot_violinplot_spd\Plant\violin_wind_data_37.25,123.5.png
Saved v