## Importing Libraries and dataset

In [None]:
# Importing libraries 
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
from windrose import WindroseAxes

In [None]:
# Importing dataset from file 
data_1_Uncleaned=pd.read_csv("../data/benin-malanville.csv")
data_2_Uncleaned=pd.read_csv("../data/sierraleone-bumbuna.csv")
data_3_Uncleaned=pd.read_csv("../data/togo-dapaong_qc.csv")

## Data Cleaning

In [None]:
# Cleaning of data
data_1 = data_1_Uncleaned.dropna(axis=1, how='all')
data_2 = data_2_Uncleaned.dropna(axis=1, how='all')
data_3 = data_3_Uncleaned.dropna(axis=1, how='all')

## Data Quality Check


In [None]:
# Checking for a missing value
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
missing_value = name_of_data.isnull().sum()
print("Missing Values in the data are:\n", missing_value)

In [None]:
# Checking for a negative value
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
negative_value = name_of_data[(name_of_data['GHI'] < 0) | (name_of_data['DNI'] < 0) | (name_of_data['DHI'] < 0)]
print("Negative values in the data are:\n", negative_value)

## General statistics analysis of datasets

In [None]:
# Display the general summary statistics of dataset
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
name_of_data.describe()

## Temperature Analysis

In [None]:
# Display Temperature Analysis
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]

plt.subplot(1, 2, 1)
plt.scatter(name_of_data['Tamb'], name_of_data['TModA'], color='blue', alpha=0.5)
plt.title('TModA vs Tamb')
plt.xlabel('Ambient Temperature (Tamb)')
plt.ylabel('Module Temperature A (TModA)')


plt.subplot(1, 2, 2)
plt.scatter(name_of_data['Tamb'], name_of_data['TModB'], color='red', alpha=0.5)
plt.title('TModB vs Tamb')
plt.xlabel('Ambient Temperature (Tamb)')
plt.ylabel('Module Temperature B (TModB)')

plt.tight_layout()
plt.show()

## Correlation Analysis

In [None]:
# Display Correlation Analysis
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
correlation_of_TModA = name_of_data['Tamb'].corr(name_of_data['TModA'])
correlation_of_TModB = name_of_data['Tamb'].corr(name_of_data['TModB'])

print(f"Correlation coefficient between Tamb and TModA: {round(correlation_of_TModA, 4)}")
print(f"Correlation coefficient between Tamb and TModB: {round(correlation_of_TModB, 4)}")

# Time Series Analysis

In [None]:
# Display Temperature Analysis
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]

name_of_data ['Timestamp'] = pd.to_datetime(name_of_data['Timestamp'])
name_of_data.set_index('Timestamp', inplace=True)

plt.subplot(2, 2, 1)
plt.plot(name_of_data['GHI'], color='blue')
plt.title('Global Horizontal Irradiance')
plt.xlabel('Timestamp')
plt.ylabel('GHI')

plt.subplot(2, 2, 2)
plt.plot(name_of_data['DNI'], color='red')
plt.title('Direct Normal Irradiance')
plt.xlabel('Timestamp')
plt.ylabel('DNI')


plt.subplot(2, 2, 3)
plt.plot(name_of_data['DHI'], color='green')
plt.title('Diffuse Horizontal Irradiance')
plt.xlabel('Timestamp')
plt.ylabel('DHI')

plt.subplot(2, 2, 4)
plt.plot(name_of_data['Tamb'], color='orange')
plt.title('Ambient Temperature')
plt.xlabel('Timestamp')
plt.ylabel('Tamb')

plt.tight_layout()
plt.show()

# Wind Analysis

In [None]:
# Display Wind Analysis
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]

name_of_data['Timestamp'] = pd.to_datetime(name_of_data['Timestamp'])
name_of_data.set_index('Timestamp', inplace=True)


plt.subplot(2, 2, 1)
plt.plot(name_of_data['WS'], color='blue', label='WS')
plt.plot(name_of_data['WSgust'], color='green', label='WSgust')
plt.plot(name_of_data['WSstdev'], color='red', label='WSstdev')
plt.title('Wind Speed')
plt.xlabel('Timestamp')
plt.ylabel('Wind Speed (m/s)')
plt.legend()



plt.subplot(2, 2, 2)
plt.plot(name_of_data['WD'], color='blue', label='WD')
plt.plot(name_of_data['WDstdev'], color='green', label='WDstdev')
plt.title('Wind Direction')
plt.xlabel('Timestamp')
plt.ylabel('Wind Direction (degrees)')
plt.legend()


plt.tight_layout()
plt.show()

In [None]:
# Display Histogram of Wind Speed
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]

plt.figure(figsize=(5, 6))
sns.histplot(name_of_data['WS'], bins=20, kde=True, color='blue')
plt.title('Histogram of Wind Speed')
plt.xlabel('Wind Speed (m/s)')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Display Wind Speed
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]

plt.figure(figsize=(10, 10))
ax = WindroseAxes.from_ax()
ax.bar(name_of_data['WD'], name_of_data['WS'], normed=True, opening=0.8, edgecolor='white')
ax.set_legend(title='Wind Speed (m/s)')
plt.title('Wind Rose Plot')
plt.show()

## Plotting

In [None]:
#Display Box Plot of a dataset
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
target_columns_to_look_1 = ['GHI', 'DNI', 'DHI', 'Tamb', 'TModA', 'TModB']
plt.figure(figsize=(10, 6))
name_of_data[target_columns_to_look_1].boxplot()
plt.title('Box Plot of Benin Malanville')
plt.ylabel('Value')
plt.xticks(rotation=45)
plt.show()

In [None]:
# Display Histograms of a dataset
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_colmun = input("Which colmun of data do you want to display: ")
name_of_data  = locals()[name_of_data]
plt.subplot(3, 2, 1)
plt.hist(name_of_data[name_of_colmun], bins=20, color='blue', alpha=0.7)
plt.title('Histogram of '+ name_of_colmun)
plt.xlabel(name_of_colmun)
plt.ylabel('Frequency')

In [None]:
# Display Scatterplots of a dataset
name_of_data = input("Enter dataset name here (info: data_1 is Benin Malanville, data_2 is Sierraleone Bumbuna and data_3 is Togo Dapaong QC): ")
name_of_data = locals()[name_of_data]
scatter_plot= [('GHI', 'Tamb'), ('WS', 'WSgust')]
plt.figure(figsize=(12, 6))
for i, pair in enumerate(scatter_plot, start=1):
    plt.subplot(1, len(scatter_plot), i)
    plt.scatter(name_of_data[pair[0]], name_of_data[pair[1]], alpha=0.5)
    plt.title(f'{pair[0]} vs. {pair[1]}')
    plt.xlabel(pair[0])
    plt.ylabel(pair[1])
plt.tight_layout()