In [None]:
import os
import sys
import pandas as pd
import statsmodels.api as sm




In [None]:
# import the Eda class
sys.path.append(os.path.abspath(os.path.join('..', 'scripts')))
from eda import Eda

In [None]:
df = pd.read_csv('../data/sierraleone-bumbuna.csv')


In [None]:
df.columns

In [None]:
    # Preprocess
df = Eda.preprocess_data(df)

In [None]:
weather_analysis=Eda(data=df)

In [None]:

# Initialize the SummaryStatistics class
stats = Eda(df)

In [None]:
# Calculate and display summary statistics
summary = stats.calculate_summary_statistics()
print("Summary Statistics:\n", summary)

In [None]:
# # Save the summary statistics to a CSV file 
# summary.to_csv('summary_statistics.csv', index=True)

In [None]:
# Perform data quality check
quality_check_results = weather_analysis.data_quality_check()

# Print the results
print("Data Quality Check Results:")
print(f"Missing Values: {quality_check_results['missing_values']}")
print(f"Negative Values: {quality_check_results['negative_values']}")
print(f"Outliers: {quality_check_results['outliers']}")


In [None]:
# df['Timestamp'] = pd.to_datetime(df['Timestamp'])  # Ensure it's in datetime format
# df.set_index('Timestamp', inplace=True)           # Set as the DataFrame index


In [None]:
# Check for missing values
print("Missing Values:\n", df.isnull().sum())
print("\nSummary Statistics:\n", df.describe())


In [None]:
# Instantiate the Eda class
time_series = Eda(df)

In [None]:
# Plot the time series data
time_series.plot_time_series(['GHI', 'DNI', 'DHI', 'Tamb'])

In [None]:
# Plot monthly patterns for the columns
# Plot monthly patterns for the specified columns
time_series.plot_monthly_patterns(['GHI', 'DNI', 'DHI', 'Tamb'])


In [None]:
# Plot daily trends (hourly averages)
time_series.plot_daily_trends(['GHI', 'DNI', 'DHI', 'Tamb'])


In [None]:
# Detect and plot anomalies
threshold_value = 1000  # Set an appropriate threshold for your data
time_series.plot_anomalies(['GHI', 'DNI', 'DHI', 'Tamb'], threshold=threshold_value)

In [None]:
# Initialize the Visualization class
ca = Eda(df)

In [None]:
# Define the columns of interest
columns_of_interest = ['GHI', 'DNI', 'DHI', 'TModA', 'TModB', 'WS', 'WSgust', 'WD']

In [None]:
#  Plot the correlation matrix
ca.plot_correlation_matrix(columns_of_interest)

In [None]:

# Plot the pairplot
ca.plot_pairplot(columns_of_interest)


In [None]:

# Plot wind conditions vs solar irradiance
wind_columns = ['WS', 'WSgust', 'WD']
solar_columns = ['GHI', 'DNI', 'DHI']
ca.plot_wind_vs_solar(wind_columns, solar_columns)

In [None]:

# Plot a wind rose
weather_analysis.plot_wind_rose(wind_speed_col='WS', wind_dir_col='WD')


In [None]:

# Plot a radial bar chart
weather_analysis.plot_radial_bar(wind_speed_col='WS', wind_dir_col='WD', num_bins=12)


In [None]:
print(weather_analysis.data.columns)


In [None]:
# Perform temperature analysis
weather_analysis.analyze_temperature(temp_col='Tamb', rh_col='RH', solar_rad_col='GHI')


In [None]:
# histograms
weather_analysis.plot_histograms(columns=['GHI', 'DNI', 'DHI', 'WS', 'Tamb'])


In [None]:
# Specify columns for Z-score analysis
columns_to_analyze = ['GHI', 'DNI', 'DHI', 'WS', 'Tamb']

# Perform Z-score analysis
flagged_points = weather_analysis.calculate_z_scores(columns=columns_to_analyze, threshold=3)

# Output flagged points for inspection
for col, flagged in flagged_points.items():
    print(f"Flagged points for {col}:")
    print(flagged)

In [None]:
# Create a bubble chart
weather_analysis.create_bubble_chart(
    x_col='GHI',
    y_col='Tamb',
    size_col='WS',
    color_col='RH',  # Optional: Use RH for bubble color
    title="GHI vs Tamb vs WS with Bubble Size Representing RH"
)
