In [None]:
import requests
import numpy as np
import json
import pandas as pd
import matplotlib.pyplot as mplt
import plotly.express as plt
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import seaborn as sns
import datetime as dt

In [None]:
def connect_drill(query, caching=True, chunk_size: int = 0):
    #username = os.getenv("DRILLUSERNAME")
    host = 'https://proxima.bigdata.fh-aachen.de:8047'
    username = 'al7739s'
    password = 'tWtx4UYhTdUbPHumX3VixMhdi'
    headers = {'Content-Type': 'application/json',
               'Authorization': '%s:%s' % (username, password)}
    #headers = {'Authorization': username + ':' + password}
    if caching:
        headers["Cache-Control"] = "max-age=" + "1440"
    else:
        headers["Cache-Control"] = "max-age=" + "0"
    #if chunk_size > 0:
        #headers["format"] = "chunks:" + str(chunk_size)
    data = {'query': "{q}".format(q=query)}

    try:
        result = requests.post(host + '/query', json=data, headers=headers)
    except Exception as e:
        print("The drill-proxy is not reachable. Please check if you are in the FH-Aachen network.")
        raise (e)

    data = None
    try:
        data = pd.read_json(result.text)
        if data.empty:
            print('Result of query is empty!')
            print('Query was: ' + query)
    except ValueError:
        print("Something went wrong when converting the json string from the datasource to a pandas DataFrame.")
        print(result.text)
    return data

In [None]:
def get_PIR_presences(room: str = "H217"):
    dict_rooms = {'H217': 'Elsen', 'H216': 'Galla', 'H215': 'Remmy'}
    room = dict_rooms[room]
    
    query = """SELECT `timestamp`,`room`, `presence`, `co2_ppm`, `temperature_celsius`, `relative_humidity_percent` 
    FROM ipenv.data.`sensor_data` 
    WHERE `room` LIKE '{room}' 
    AND `timestamp` > 1627776000 
    ORDER BY `timestamp` ASC
    LIMIT 1000000""".format(room=room)
    
    pir_data = connect_drill(query, caching=True)
    
    pir_data["timestamp"].dt.tz_localize('Europe/Berlin')
    pir_data["timestamp"] = pir_data["timestamp"] + pd.Timedelta(hours=2)

    pir_data["presence"] = pir_data["presence"].astype(int)
    pir_data.head()
    
    pir_data = pir_data.groupby(pd.Grouper(key="timestamp", freq="5min")).mean()\
        .round(0).reset_index(drop=False)
    
    return pir_data

In [None]:
def encode_cyclical(data, col, max_val):
    data[col + '_sin'] = np.sin(2 * np.pi * data[col]/max_val)
    data[col + '_cos'] = np.cos(2 * np.pi * data[col]/max_val)
    return data

In [None]:
df = get_PIR_presences()

In [None]:
df.dtypes

In [None]:
df_test = df.copy()
df_test['co2_ppmShifted'] = df_test['co2_ppm'].shift(-200)
# Create figure with secondary y-axis
fig = make_subplots(specs=[[{"secondary_y": True}]])

# Add traces
fig.add_trace(
    go.Scatter(x=df_test['timestamp'], y=df_test['co2_ppm'], name="yaxis data"),
    secondary_y=False,
)

fig.add_trace(
    go.Scatter(x=df_test['timestamp'], y=df_test['co2_ppmShifted'], name="yaxis2 data"),
    secondary_y=True,
)

# Add figure title
fig.update_layout(
    title_text="Double Y Axis Example"
)

# Set x-axis title
fig.update_xaxes(title_text="xaxis title")

# Set y-axes titles
fig.update_yaxes(title_text="<b>primary</b> yaxis title", secondary_y=False)
fig.update_yaxes(title_text="<b>secondary</b> yaxis title", secondary_y=True)

fig.show()

In [None]:
x = df['timestamp']
y = df['co2_ppm']
fig = plt.line(df, x='timestamp', y='co2_ppm')
#fig = plt.scatter(df, x='timestamp', y='co2_ppm')
fig.show()

In [None]:
df_test = df
df_test = df_test.assign(hoursMinutesSeconds=lambda d: (d['timestamp'].dt.hour.astype('int') * 10000 + 
                                                        d['timestamp'].dt.minute.astype('int') * 100 + 
                                                       d['timestamp'].dt.second.astype('int')))

df_test['hour_sin'] = np.sin(2 * np.pi * df_test['hoursMinutesSeconds']/235959.0)
df_test['hour_cos'] = np.cos(2 * np.pi * df_test['hoursMinutesSeconds']/235959.0)
#df['hour_sin'] = np.sin(2 * np.pi * df_test2['timestamp'].dt.hour/23.0)
#df['hour_cos'] = np.cos(2 * np.pi * df_test2['timestamp'].dt.hour/23.0)
df_test['hour_sin'].plot()

In [None]:
df_test.plot.scatter('hour_sin', 'hour_cos').set_aspect('equal')

In [None]:
mask = np.logical_and(df['timestamp'].dt.day == 1, df['timestamp'].dt.month == 8)
df_test = df.loc[mask]

In [None]:
#df_test = df_test.loc[df['version'].isin(['1.0.4'])]
#df_test.head(5000000)

In [None]:
df_test = df_test.assign(epoch=lambda d: d['timestamp'].astype('int64'))
df_test['time_elapsed'] = ((df_test['timestamp'] - df_test['timestamp'].dt.normalize()) / pd.Timedelta('1 second')).astype(int)

In [None]:
#Linear Regression
rng = np.random.RandomState(42)
x = 10 * rng.rand(100)
y = 2 * x - 1 + rng.rand(100)

from sklearn.linear_model import LinearRegression

model = LinearRegression(fit_intercept=True)

X = x[:, np.newaxis]
X.shape

model.fit(X,y)

xfit = np.linspace(-1, 11)
Xfit = xfit[:, np.newaxis]
yfit = model.predict(Xfit)

In [None]:
trace1 = go.Scatter(
    x=x,
    y=y,
    mode='markers'
)
trace2 = go.Scatter(
    x=xfit,
    y=yfit,
    mode='lines'
)
data = [trace1, trace2]
fig = go.Figure(data=data)
fig.show()

In [None]:
iris = sns.load_dataset('iris')
iris.head()

In [None]:
sns.pairplot(iris, hue='species', height=1.5)

In [None]:
X_iris = iris.drop('species', axis=1)
y_iris = iris['species']

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score

Xtrain, Xtest, ytrain, ytest = train_test_split(X_iris, y_iris, random_state=1)

model = GaussianNB()
model.fit(Xtrain, ytrain)
y_model = model.predict(Xtest)

accuracy_score(ytest, y_model)

In [None]:
#Principal Component Analysis
from sklearn.decomposition import PCA

model = PCA(n_components=2)
model.fit(X_iris)
X_2D = model.transform(X_iris)

In [None]:
iris['PCA1'] = X_2D[:, 0]
iris['PCA2'] = X_2D[:, 1]
sns.lmplot(x="PCA1", y="PCA2", hue='species', data=iris, fit_reg=False)