# La Guajira wind energy analysis

In [6]:
import pandas as pd
import numpy as np

In [11]:
# Load datasets and inspect first rows
wind_speed = pd.read_csv("./datasets/windGuajira2019.csv", skiprows= 3)
air_density = pd.read_csv("./datasets/airDensityGuajira2019.csv", skiprows= 3)

In [12]:
# Inspect wind speed dataframe
print(wind_speed.head(), "\n\n")
print(wind_speed.info())

               time        local_time  electricity  wind_speed
0  2019-01-01 00:00  2018-12-31 19:00        0.952      13.742
1  2019-01-01 01:00  2018-12-31 20:00        0.953      13.783
2  2019-01-01 02:00  2018-12-31 21:00        0.946      13.515
3  2019-01-01 03:00  2018-12-31 22:00        0.940      13.327
4  2019-01-01 04:00  2018-12-31 23:00        0.941      13.341 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 4 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   time         8760 non-null   object 
 1   local_time   8760 non-null   object 
 2   electricity  8760 non-null   float64
 3   wind_speed   8760 non-null   float64
dtypes: float64(2), object(2)
memory usage: 273.9+ KB
None


In [14]:
# Inspect air density dataframe
print(air_density.head(), "\n\n")
print(air_density.info())

               time        local_time   rhoa
0  2019-01-01 00:00  2018-12-31 19:00  1.167
1  2019-01-01 01:00  2018-12-31 20:00  1.168
2  2019-01-01 02:00  2018-12-31 21:00  1.169
3  2019-01-01 03:00  2018-12-31 22:00  1.169
4  2019-01-01 04:00  2018-12-31 23:00  1.169 


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8760 entries, 0 to 8759
Data columns (total 3 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   time        8760 non-null   object 
 1   local_time  8760 non-null   object 
 2   rhoa        8760 non-null   float64
dtypes: float64(1), object(2)
memory usage: 205.4+ KB
None


Both dataframes contain the pair of columns `time` and `local_time`, which containing datetime information currently stored as object type and need to be coerced into the appropriate data type. However, as these dataframes contain information for the same geographical area and for the same timespan, we can join them and dispense with repeated columns.

In [54]:
wind_speed_copy = wind_speed.copy()
wind_speed_copy.drop(["time", "electricity"], axis = 1 ,inplace = True)
wind_speed_copy["local_time"] = pd.to_datetime(wind_speed_copy["local_time"])
wind_speed_copy.set_index("local_time", inplace= True)
wind_speed_copy.head()

Unnamed: 0_level_0,wind_speed
local_time,Unnamed: 1_level_1
2018-12-31 19:00:00,13.742
2018-12-31 20:00:00,13.783
2018-12-31 21:00:00,13.515
2018-12-31 22:00:00,13.327
2018-12-31 23:00:00,13.341


In [58]:
air_density_copy = air_density.copy()
air_density_copy.drop("time", axis= 1, inplace= True)
air_density_copy["local_time"] = pd.to_datetime(air_density_copy["local_time"])
air_density_copy.set_index("local_time", inplace= True)
air_density_copy.rename(columns = {"rhoa": "air_density"}, inplace = True)
air_density_copy.head()

Unnamed: 0_level_0,air_density
local_time,Unnamed: 1_level_1
2018-12-31 19:00:00,1.167
2018-12-31 20:00:00,1.168
2018-12-31 21:00:00,1.169
2018-12-31 22:00:00,1.169
2018-12-31 23:00:00,1.169


In [59]:
# Inspect both new dataframes
print(wind_speed_copy.info(), "\n\n")
print(air_density_copy.info())

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2018-12-31 19:00:00 to 2019-12-31 18:00:00
Data columns (total 1 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   wind_speed  8760 non-null   float64
dtypes: float64(1)
memory usage: 136.9 KB
None 


<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 8760 entries, 2018-12-31 19:00:00 to 2019-12-31 18:00:00
Data columns (total 1 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   air_density  8760 non-null   float64
dtypes: float64(1)
memory usage: 136.9 KB
None


In [60]:
# Explicitly check null values in both dataframes
print("Null values in wind speed dataframe:\n",wind_speed_copy.isna().sum(), "\n\n")
print("Null values in air density dataframe:\n",air_density_copy.isna().sum())


Null values in wind speed dataframe:
 wind_speed    0
dtype: int64 


Null values in air density dataframe:
 air_density    0
dtype: int64


In [61]:
# Join dataframes
wind_df = wind_speed_copy.join(air_density_copy)
wind_df.head()

Unnamed: 0_level_0,wind_speed,air_density
local_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2018-12-31 19:00:00,13.742,1.167
2018-12-31 20:00:00,13.783,1.168
2018-12-31 21:00:00,13.515,1.169
2018-12-31 22:00:00,13.327,1.169
2018-12-31 23:00:00,13.341,1.169


In [62]:
# Descriptive statistics from the new dataframe
wind_df.describe()

Unnamed: 0,wind_speed,air_density
count,8760.0,8760.0
mean,10.019084,1.156697
std,2.702837,0.008449
min,0.203,1.131
25%,8.25375,1.151
50%,10.3605,1.156
75%,12.05025,1.162
max,16.4,1.178
