In [None]:
!pip install pandas matplotlib seaborn scipy
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import zscore

# Load dataset
data = pd.read_csv('./data/solar_data.csv', parse_dates=['Timestamp'])

# Summary Statistics
summary_stats = data.describe()

# Data Quality Check
missing_values = data.isnull().sum()
outliers = data.apply(lambda x: ((x - x.mean()) > 3 * x.std()).sum(), axis=0)

# Time Series Analysis
data['Month'] = data['Timestamp'].dt.month
plt.figure(figsize=(10, 6))
sns.lineplot(data=data, x='Month', y='GHI', label='GHI')
sns.lineplot(data=data, x='Month', y='DNI', label='DNI')
plt.legend()
plt.title('Solar Irradiance Trends')
plt.show()

# Correlation Analysis
correlation_matrix = data.corr()
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm')
plt.title('Correlation Heatmap')
plt.show()

# Wind Analysis
sns.scatterplot(data=data, x='WS', y='WD', size='RH', hue='BP')
plt.title('Wind Analysis')
plt.show()

# Z-Score Analysis
data['Z_GHI'] = zscore(data['GHI'])
z_outliers = data[np.abs(data['Z_GHI']) > 3]

# Cleaned Data
cleaned_data = data.dropna()
cleaned_data.to_csv('cleaned_solar_data.csv', index=False)


: 