# Clean the data and resample

Import relevant modules

In [None]:
import pandas as pd 
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns

Select relevant columns

In [None]:
col_list = ['TimeStamp', 'Power_kW', 'WindSpeed_mps']

Read data from csv file

In [None]:
df = pd.read_csv(r'C:\Users\AliMS\Documents\project\code\data\2018_567.csv', usecols = col_list)

Plot power vs wind speed

In [None]:
sns.relplot(data=df, x="WindSpeed_mps", y="Power_kW")
plt.savefig(r'C:\Users\AliMS\Documents\project\code\figures\original.png', bbox_inches='tight', dpi = 300)

Convert time stamps to date time and set as index

In [None]:
df['TimeStamp'] = pd.to_datetime(df['TimeStamp'])
df.set_index('TimeStamp', inplace = True)

Remove values where power is negative

In [None]:
df.loc[df['Power_kW'] < 0] = np.nan

Remove values where power is 0 and wind speed is above the cut in value of 3 m/s

In [None]:
df.loc[df['Power_kW'] > 7500] = np.nan
df.loc[(df['Power_kW'] == 0) & (df['WindSpeed_mps'] > 3)] = np.nan

Remove values above cut out speed of 25 m/s

In [None]:
df.loc[df['WindSpeed_mps'] > 25] = np.nan
df = df.dropna(how = 'any')

Plot power vs wind speed

In [None]:
sns.relplot(data=df, x="WindSpeed_mps", y="Power_kW")
plt.savefig(r'C:\Users\AliMS\Documents\project\code\figures\obvious.png', bbox_inches='tight', dpi = 300)

Resample the data, taking the mean over 10 minutes

In [None]:
df = df.resample('10min', axis = 'index', label = 'right').mean()

Remove missing values

In [None]:
df = df.dropna(how = 'any')

Plot power vs wind speed

In [None]:
sns.relplot(data=df, x="WindSpeed_mps", y="Power_kW")
plt.savefig(r'C:\Users\AliMS\Documents\project\code\figures\resample.png', bbox_inches='tight', dpi = 300)

Write data to blank csv file

In [None]:
df.to_csv(r'C:\Users\AliMS\Documents\project\code\data\10_2018_567.csv')