In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import pylab as pl
import Plot_Induction_Figure as pif

# Proyecto FAA

Jorge Durán, Jaime Enríquez, Marcos de las Heras

## Pre-análisis de datos

**Metadata**:

In [2]:
sensor_m = pd.read_csv("data/HT_Sensor_metadata.dat", sep="\t")
# Correccion: cabecera de metadata mal leido
sensor_m.drop(["dt"], axis=1, inplace=True)
sensor_m.rename(columns={"t0":"dt","class":"t0", "Unnamed: 2":"class"}, inplace=True)

columns_metadata = ["id", "date", "class", "t0", "dt"]
metadata = np.loadtxt('data/HT_Sensor_metadata.dat', skiprows=1, dtype=str)
metadata_aux = np.array( metadata[:,[0,3,4]], dtype=float )

In [3]:
sensor_m.head()

Unnamed: 0,id,date,class,t0,dt
0,0,07-04-15,banana,13.49,1.64
1,1,07-05-15,wine,19.61,0.54
2,2,07-06-15,wine,19.99,0.66
3,3,07-09-15,banana,6.49,0.72
4,4,07-09-15,wine,20.07,0.53


In [4]:
sensor_m.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 100 entries, 0 to 99
Data columns (total 5 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   id      100 non-null    int64  
 1   date    100 non-null    object 
 2   class   100 non-null    object 
 3   t0      100 non-null    float64
 4   dt      100 non-null    float64
dtypes: float64(2), int64(1), object(2)
memory usage: 4.0+ KB


In [5]:
sensor_m.describe()

Unnamed: 0,id,t0,dt
count,100.0,100.0,100.0
mean,49.5,13.1786,0.7516
std,29.011492,4.799102,0.428424
min,0.0,5.62,0.12
25%,24.75,8.32,0.48
50%,49.5,13.385,0.62
75%,74.25,16.38,0.9325
max,99.0,23.62,2.31


**Dataset**:

In [None]:
sensor_d = pd.read_csv("data/HT_Sensor_dataset.dat", sep="  ", engine="python")
# Correccion: datos de dataset mal leidos
sensor_d.dropna(axis=1, inplace=True)
sensor_d.columns=["id","time", "R1","R2","R3","R4","R5","R6","R7","R8", "Temp.", "Humidity"]

columns_dataset = ["id","time", "R1","R2","R3","R4","R5","R6","R7","R8", "Temp.", "Humidity"]
dataset = np.loadtxt('data/HT_Sensor_dataset.dat', skiprows=1)

In [None]:
sensor_d.head()

In [None]:
sensor_d.info()

In [None]:
sensor_d.describe()

* Frecuencia cada clase en experimentos:

In [None]:
sensor_m["class"].hist()

* Numero de medidas tomadas en sensor_d por experimento

In [None]:
# Frecuencia de datos en sensor_d por experimento
print(sensor_d.id.value_counts())
sensor_d["id"].hist(bins=len(sensor_m))

* Gráficas para sensores, temperatura y humedad para cada experimento:

In [None]:
# Sensores R, temperatura y humedad para caso concreto (id = X)
def graficasSensores(id=38):
    sensor_d_X = sensor_d[sensor_d.id==id]
    # Sensores
    fig, axs = plt.subplots(8)
    fig.suptitle('Sensores R1-R8, id='+str(id)+", "+sensor_m[sensor_m.id==id]["class"].values[0])
    for i in range(8):
        axs[i].plot(sensor_d_X.iloc[:,i+2])
        axs[i].set_title("R"+str(i+1))

def graficaTemp(id=38):
    sensor_d_X = sensor_d[sensor_d.id==id]
    # Temperatura
    plt.title("Temperature, id="+str(id)+", "+sensor_m[sensor_m.id==id]["class"].values[0])
    plt.xlabel("Num. Experimento")
    plt.ylabel("Grados ºC")
    sensor_d_X["Temp."].plot()
    
def graficaHum(id=38):
    sensor_d_X = sensor_d[sensor_d.id==id]
    # Humedad
    plt.title("Humidity, id="+str(id)+", "+sensor_m[sensor_m.id==id]["class"].values[0])
    plt.xlabel("Num. Experimento")
    plt.ylabel("% Humidity")
    sensor_d_X["Humidity"].plot()

In [None]:
pif.graficaInduccion(metadata=metadata, dataset=dataset)

In [None]:
graficasSensores()

In [None]:
graficaTemp()

In [None]:
graficaHum()

* Grafica temperatura vs humedad media

In [None]:
def plotTempHum(poblacion, c):
    plt.scatter(poblacion.groupby("id")["Temp."].mean(), 
                poblacion.groupby("id")["Humidity"].mean(),
                c=c)
    plt.xlabel("Grados ºC")
    plt.ylabel("% Humidity")

In [None]:
# Lista con colores segun clase    
color = {'wine':'r', 'banana':'g', 'background':'b'}
sensor_m_color = [color[c] for c in sensor_m["class"]]
sensor_m_color.pop(95) # Elimino caso NaN

plotTempHum(sensor_d, sensor_m_color)