In [246]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import date, datetime, timedelta
import plotly.offline as py
import plotly.graph_objs as go
from plotly import __version__
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
init_notebook_mode(connected=True)

In [247]:
turbine_readings = pd.read_csv('./data/WindData_scrubbed.csv')
print(turbine_readings.shape)

(2600, 86)


In [248]:
start_datetime = datetime(2016, 1, 1, 0, 0, 0)
print(start_datetime)
datetime_list = []
for i in range(turbine_readings.shape[0]):
    datetime_list.append(start_datetime)
    start_datetime += timedelta(hours=1.68)

print(len(datetime_list))
turbine_readings["timestamp"] = datetime_list

2016-01-01 00:00:00
2600


In [249]:
turbine_readings.tail()

Unnamed: 0,kW_Turbine1,kW_Turbine2,kW_Turbine3,kW_Turbine4,kW_Turbine5,kW_Turbine6,kW_Turbine7,kW_Turbine8,kW_Turbine9,kW_Turbine10,...,kW_Turbine78,kW_Turbine79,kW_Turbine80,kW_Turbine81,kW_Turbine82,kW_Turbine83,kW_Turbine84,kW_Turbine85,kW_Turbine86,timestamp
2595,35.02,34.15,48.33,34.822,28.674,46.79,34.17,63.06,74.11,64.547,...,42.765,22.08,53.92,63.965,9.36,59.65,58.16,51.0,57.81,2016-06-30 15:36:00
2596,44.99,23.61,37.57,35.266,33.167,35.87,48.58,71.635,44.7,70.42,...,49.163,45.69,76.75,63.862,68.29,60.42,69.75,59.13,78.24,2016-06-30 17:16:48
2597,30.98,10.26,28.07,35.71,37.66,50.95,56.53,80.21,28.3,30.01,...,55.56,54.42,54.88,63.76,75.37,36.97,66.78,46.53,64.05,2016-06-30 18:57:36
2598,58.29,17.14,29.35,25.5,21.59,66.89,101.01,88.785,53.31,29.38,...,59.44,42.71,54.71,59.17,66.25,67.74,60.65,57.02,48.53,2016-06-30 20:38:24
2599,51.2,27.58,48.42,42.57,50.98,81.07,128.53,97.36,75.69,45.89,...,47.19,49.66,42.54,49.51,57.88,68.84,49.53,61.59,53.1,2016-06-30 22:19:12


In [250]:
turbine_names = list(turbine_readings.columns.values)
turbine_names.remove("timestamp")
random_turbines = np.random.choice(turbine_names, size=10, replace=False)
print(random_turbines)
traces = []

for turbine in random_turbines:
    trace = go.Scatter(x = turbine_readings["timestamp"],
                       y = turbine_readings[turbine],
                       mode = 'lines',
                       name = turbine)
    traces.append(trace)

py.iplot(traces, filename='line-mode')

['kW_Turbine86' 'kW_Turbine85' 'kW_Turbine83' 'kW_Turbine35'
 'kW_Turbine80' 'kW_Turbine63' 'kW_Turbine67' 'kW_Turbine25'
 'kW_Turbine14' 'kW_Turbine38']


In [251]:
turbine_readings = turbine_readings.melt(id_vars="timestamp", var_name="turbine_num", value_name="reading")

In [252]:
turbine_readings["date"] = turbine_readings["timestamp"].dt.date
print(turbine_readings.head())

            timestamp  turbine_num  reading        date
0 2016-01-01 00:00:00  kW_Turbine1   316.71  2016-01-01
1 2016-01-01 01:40:48  kW_Turbine1   282.87  2016-01-01
2 2016-01-01 03:21:36  kW_Turbine1   275.02  2016-01-01
3 2016-01-01 05:02:24  kW_Turbine1   360.92  2016-01-01
4 2016-01-01 06:43:12  kW_Turbine1   386.46  2016-01-01


In [253]:
daily_mean_turbine_reading = turbine_readings.groupby(["date", "turbine_num"], as_index=False)["reading"].mean()
daily_mean_turbine_reading.head()

station_files = ["./data_2016/weather_2016/projects/Alta X/GHCND_USW00003159/AWND_2016-01-01_2016-12-31.csv",
                 "./data_2016/weather_2016/projects/Alta X/GHCND_USW00053144/AWND_2016-01-01_2016-12-31.csv",
                 "./data_2016/weather_2016/projects/Alta XI/GHCND_USW00023187/AWND_2016-01-01_2016-12-31.csv"]

for file in station_files:
    df = pd.read_csv(file)
    df["date"] = df["date"].str.split("T", expand=True)[0]
    df["date"] = pd.to_datetime(df["date"]).dt.date
    df.rename(columns={"value": (df["station"][0] + "|" + df["datatype"][0])}, inplace=True)
    df = df[["date", (df["station"][0] + "|" + df["datatype"][0])]]
    #print(df.info(), daily_mean_turbine_reading.info())
    #print(type(df["date"]), type(daily_mean_turbine_reading["date"]))
    #print(df["date"][0], daily_mean_turbine_reading["date"][0])
    #print(df["date"][0] == daily_mean_turbine_reading["date"][0])
    daily_mean_turbine_reading = pd.merge(daily_mean_turbine_reading, df, on="date", how="left")

print(daily_mean_turbine_reading.head())

         date   turbine_num     reading  GHCND:USW00003159|AWND  \
0  2016-01-01   kW_Turbine1  837.602667                     4.9   
1  2016-01-01  kW_Turbine10  759.655333                     4.9   
2  2016-01-01  kW_Turbine11  544.115333                     4.9   
3  2016-01-01  kW_Turbine12  823.235333                     4.9   
4  2016-01-01  kW_Turbine13  789.448667                     4.9   

   GHCND:USW00053144|AWND  GHCND:USW00023187|AWND  
0                     1.6                    19.0  
1                     1.6                    19.0  
2                     1.6                    19.0  
3                     1.6                    19.0  
4                     1.6                    19.0  


In [254]:
random_turbines = np.random.choice(turbine_names, size=1, replace=False)
print(random_turbines)
traces = []

for turbine in random_turbines:
    filtered_turbine = daily_mean_turbine_reading.loc[daily_mean_turbine_reading["turbine_num"] == turbine]
    
    for column in filtered_turbine:
        if column.startswith("GHCND"):
            trace = go.Scatter(x=filtered_turbine[column],
                               y=filtered_turbine["reading"],
                               name=column,
                               mode='markers')
            traces.append(trace)

py.iplot(traces, filename='basic-scatter')

['kW_Turbine15']
