# TimeSeries DataAnalytics Tutorial



In [None]:
import visdom
import numpy as np
import chart_studio.plotly as py
import plotly.express as px
import plotly.tools as tls
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import pandas as pd
from datetime import datetime
import requests
import json
import matplotlib.pyplot as plt

## I/ Requete des données

### A/ Données SmartGrid

### B/ Données Eolienne

In [None]:
def find_data(metric,col):
    query = {
            "start_absolute":1,
            "metrics": [
                {
                    "name": metric,
                    "tags": {
                        'column' : [col]
                    }
                }
            ]}
    response = requests.post(kairosdb_server + "/api/v1/datapoints/query", data=json.dumps(query))
    #print("Status code: %d" % response.status_code)
    #print("JSON response:")
    #print(response.json())
    #print(len(response.json()['queries'][0]['results'][0]['values'])," data fetched from kairosDB")
    return response

In [None]:
def type_convert(df):
    head = df.columns
    for item in head : 
        df[item] = pd.to_numeric(df[item])

In [None]:
def json_to_df(metric):
    head = ['timestamp','Temps écoulé', '4069 state', 'Battery voltage', 'Flow SP',
       'Unit Code', 'Flow M', 'Pressure', 'Temperature', 'Latitude',
       'Longitude', 'Altitude', 'Head. Rel. True North', 'Pressure.1',
       'Temperature.1', 'Humidity', 'MDA Wnd Dir', 'MDA Wnd Speed',
       'MWD Wind Dir', 'MWD Wind Speed', 'Gaz Concentration 1',
       'Gaz Concentration 2', 'Gaz Concentration 3', 'Gaz Concentration 4',
       'SPA 1', 'SPA 2', 'Cellule Photo', 'Temperature.2', 'Pressure.2',
       'Flow MassFlow 1', 'NOTUSED Flow MassFlow 2', 'Flow', 'Humidity.1',
       'Test', 'Details', 'SPA 3', 'SPA 4', 'CavityPressure', 'CavityTemp',
       'CH4', 'CH4_dry', 'C2H6', 'C2H6_dry', '13CH4', 'H2O', 'CO2',
       'C2C1Ratio', 'Delta_iCH4_Raw', 'HP_Delta_iCH4_30s',
       'HP_Delta_iCH4_2min', 'HP_Delta_iCH4_5min']
    
    df = pd.DataFrame(columns = head) 
    head.remove('timestamp')
    
    for col in head : 
        #print(col)
        response = find_data(metric,col)
        #print(response.json())
        data_list = []
        timestamp = []
        data = response.json()['queries'][0]['results'][0]['values']
        for item in data:
            #print(item)
            data_list.append(item[1])     
        #print(data_list)
        df[col]=data_list
        if (col == 'Temps écoulé'):
            for item in data:
                timestamp.append(item[0])
            #print(timestamp)
            df['timestamp']=timestamp
    return df

In [None]:
global kairosdb_server 
kairosdb_server = "http://localhost:9080"
metric="test_eolienne"

In [None]:
df=json_to_df(metric)
type_convert(df)

In [None]:
df

In [None]:
x = [datetime.utcfromtimestamp(item/1000).strftime('%Y-%m-%d %H:%M:%S')  for item in df['timestamp']]

In [None]:
x

In [None]:
x_ref = df['timestamp'].tolist()
fig = go.Figure()
fig.add_trace(go.Scatter(
                x=x,
                y=df["Latitude"],
                name="Latitude",
                line_color='deepskyblue',
                opacity=0.8))

fig.add_trace(go.Scatter(
                x=x,
                y=df["Temperature"],
                name="Temperature",
                line_color='dimgray',
                opacity=0.8))

# Use date string to set xaxis range
fig.update_layout(xaxis_range=[x_ref[0]-3600*2*1000,x_ref[-1]-3600*2*1000],
                  title_text="éolienne data series")
fig.show()

### C/ Données de polution

## II/ Statistiques

In [None]:
from sklearn.linear_model import LinearRegression


In [None]:
lr_x=df['timestamp'].values.reshape(-1, 1) 
lr_y=df['Temperature'].values.reshape(-1, 1) 

In [None]:
lrModel = LinearRegression()
lrModel.fit(lr_x,lr_y)

In [None]:
lrModel.score(lr_x,lr_y)

In [None]:
alpha = lrModel.intercept_[0]
alpha

In [None]:
beta = lrModel.coef_[0][0]
beta

In [None]:
pred = alpha + beta*lr_x
pred

In [None]:
temp = temp_ori - res
temp

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
temp.plot(ax=ax, lw=.5)
#ax.set_ylim(1.99, 2.02)
ax.set_xlabel('Date')
ax.set_ylabel('Mean temperature after treatement')

## III/ FFT, PSD : analyse en fréquence du signal

Notebook [FFTTuto](./fft.ipynb) illustrant l'utilisation des FFT et PSD sur des données temporelles


In [None]:
import scipy as sp
import scipy.fftpack

In [None]:
#Calcul fft
temp_fft = sp.fftpack.fft(temp.values)

In [None]:
#Calcul psd
temp_psd = np.abs(temp_fft) ** 2

In [None]:
fftfreq = sp.fftpack.fftfreq(len(temp_psd), 1. / 365*24*3600)

In [None]:
i = fftfreq > 0

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(8, 4))
ax.plot(fftfreq[i], 10 * np.log10(temp_psd[i]))
#ax.set_xlim(0, 5)
ax.set_xlabel('Frequency (1/sec)')
ax.set_ylabel('PSD (dB)')

In [None]:
#Extraction des fréquences les plus importantes (FFT tronquée)
temp_fft_bis = temp_fft.copy()
temp_fft_bis[np.abs(fftfreq) > 0.0005] = 0

In [None]:
#Calcul de la courbe de température inverse de la FFT tronquée
temp_slow = np.real(sp.fftpack.ifft(temp_fft_bis))
fig, ax = plt.subplots(1, 1, figsize=(6, 3))
ax.plot_date(date, temp_slow, '-')
temp.plot(ax=ax, lw=.5)
#ax.set_xlim(df['timestamp'][0],
#            df['timestamp'][132])
#ax.set_ylim(-10, 40)
ax.set_xlabel('Date')
ax.set_ylabel('Mean temperature')

## IV/ Clustering


## V/ Apprentissage