## Solar Radiation Analysis

#### IMPORTING IMPORTANT LIBRARIES

In [1]:
import datetime

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
plt.style.use('dark_background')

In [2]:
import sys
import os

sys.path.append(os.path.abspath('../scripts'))

from utils import load_dataset
from utils import detect_outliers

### EXPLORATORY DATA ANALYSIS

#### IMPORTING DATASET

In [3]:
benin, sierra, togo = load_dataset()

#### Data Quality Check

##### Data Quality check: Null values and duplicates

In [4]:
benin.isnull().sum()

Timestamp             0
GHI                   0
DNI                   0
DHI                   0
ModA                  0
ModB                  0
Tamb                  0
RH                    0
WS                    0
WSgust                0
WSstdev               0
WD                    0
WDstdev               0
BP                    0
Cleaning              0
Precipitation         0
TModA                 0
TModB                 0
Comments         525600
dtype: int64

**OBSERVATIONS**
* From the 19 columns the column `Comments` doesn't contain any values. The column needs to be deleted

In [5]:
sierra.isnull().sum()

Timestamp             0
GHI                   0
DNI                   0
DHI                   0
ModA                  0
ModB                  0
Tamb                  0
RH                    0
WS                    0
WSgust                0
WSstdev               0
WD                    0
WDstdev               0
BP                    0
Cleaning              0
Precipitation         0
TModA                 0
TModB                 0
Comments         525600
dtype: int64

**OBSERVATIONS**
* From the 19 columns the column `Comments` doesn't contain any values. The column needs to be deleted

In [6]:
togo.isnull().sum()

Timestamp             0
GHI                   0
DNI                   0
DHI                   0
ModA                  0
ModB                  0
Tamb                  0
RH                    0
WS                    0
WSgust                0
WSstdev               0
WD                    0
WDstdev               0
BP                    0
Cleaning              0
Precipitation         0
TModA                 0
TModB                 0
Comments         525600
dtype: int64

**OBSERVATIONS**
* From the 19 columns the column `Comments` doesn't contain any values. The column needs to be deleted

In [7]:
benin.drop('Comments', axis= 1, inplace=True)
togo.drop('Comments', axis= 1, inplace=True)
sierra.drop('Comments', axis= 1, inplace=True)

In [8]:
benin.duplicated().sum()

np.int64(0)

In [9]:
sierra.duplicated().sum()

np.int64(0)

In [10]:
togo.duplicated().sum()

np.int64(0)

**OBSERVATIONS**
* Datas don't contain duplicates.

##### Data Quality check: Negative numbers

In [11]:
numeric_col = benin.select_dtypes(exclude='object').columns

negative_col = (benin[numeric_col] < 0).any()
negative_col = negative_col[negative_col].index.to_list()
print(f"The negative columns are {negative_col}")

benin[negative_col] = benin[negative_col].clip(lower=0)

The negative columns are ['GHI', 'DNI', 'DHI']


In [12]:
numeric_col = sierra.select_dtypes(exclude='object').columns

negative_col = (sierra[numeric_col] < 0).any()
negative_col = negative_col[negative_col].index.to_list()
print(f"The negative columns are {negative_col}")

sierra[negative_col] = sierra[negative_col].clip(lower=0)

The negative columns are ['GHI', 'DNI', 'DHI']


In [13]:
numeric_col = togo.select_dtypes(exclude='object').columns

negative_col = (togo[numeric_col] < 0).any()
negative_col = negative_col[negative_col].index.to_list()
print(f"The negative columns are {negative_col}")

togo[negative_col] = togo[negative_col].clip(lower=0)

The negative columns are ['GHI']


##### Data Quality check: Outliers

In [6]:
benin_outliers = detect_outliers(benin)
print ('Benin-malanville')
# print(f'Outliers for Benin-malanville:\n{benin_outliers}')  # uncomment this to see the individual values of outliers
print(f'Columns that contain outliers:\n{list(benin_outliers.keys())}')

Benin-malanville
Columns that contain outliers:
['ModA', 'ModB', 'WS', 'WSgust']


In [7]:
sierra_outliers = detect_outliers(sierra)
print('Sierraleone-bumbuna')
# print(f'Outliers for Sierraleone-bumbuna:\n{sierra_outliers}')       # uncomment this to see the individual values of outliers
print(f'Columns that contain outliers:\n{list(sierra_outliers.keys())}')

Sierraleone-bumbuna
Columns that contain outliers:
['ModA', 'ModB', 'WS', 'WSgust']


In [8]:
togo_outliers = detect_outliers(togo)
print('Togo-dapaong')
# print(f'Outliers for togo-dapaong:\n{togo_outliers}')        # uncomment this to see the individual values of outliers
print(f'Columns that contain outliers:\n{list(togo_outliers.keys())}')

Togo-dapaong
Columns that contain outliers:
['ModA', 'ModB', 'WS', 'WSgust']
