In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pytorch_lightning as pl
import seaborn as sns

import pandas as pd
from sklearn import cluster, datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler

import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset, TensorDataset, random_split

In [None]:
import torch.utils.data as data_utils

In [None]:
# Load data
f = pd.read_csv("data.csv")
f.rename(columns={'time sample band g': 'time_g', 'time sample band r': 'time_r',
                    'time sample band i': 'time_i','total flux + noise band g': 'tfnbg',
                    'total flux + noise band r': 'tfnbr', 'total flux + noise band i': 'tfnbi',}, inplace=True)

In [None]:
f

In [None]:
plt.figure(figsize = (20, 10))
plt.plot(f['tfnbg'][:500], '.r')

In [None]:
un = f[f['images']==1]
deux = f[f['images']==2]
trois = f[f['images']==3]
quatre = f[f['images']==4]

multi = f[f['images']!=1]

In [None]:
plt.figure(figsize = (25, 15))
plt.plot(un['tfnbg'], '+b')
plt.plot(deux['tfnbg'], '.r', alpha = 0.2)
#plt.plot(trois['tfnbg'], ':g', alpha = 0.4)
#plt.plot(quatre['tfnbg'], '-.y', alpha = 0.6)

In [None]:
plt.figure(figsize = (25, 15))
plt.plot(un['tfnbg'][:500], '+r', alpha = 1)
plt.plot(deux['tfnbg'][:500], '.b', alpha = 1)
plt.plot(trois['tfnbg'][:500], '.g', alpha = 1)
plt.plot(quatre['tfnbg'][:1000], '.y', alpha = 1);

# DATA MINING

In [None]:
g = f[['images', 'time_g', 'tfnbg']]

In [None]:
colors = {1:'red', 2:'blue', 3:'green', 4:'orange'}
fig, ax = plt.subplots()
grouped = g.groupby('images')

for key, group in grouped:
    group.plot(ax=ax, kind='scatter', x='time_g', y='tfnbg', label=key, color=colors[key], alpha=[1, 0.1, 0.1, 0.1])
    
plt.show()

In [None]:
#!pip install plotly
import plotly
import plotly.express as px


features = ['images', 'time_g', 'tfnbg', 'time_r', 'tfnbr', 'time_i', 'tfnbi']

fig = px.scatter_matrix(
    g,
    dimensions=features)
fig.update_traces(diagonal_visible=False)
fig.show()

In [None]:
plt.plot(un['tfnbg'], un['tfnbr'], '.')
plt.plot(deux['tfnbg'], deux['tfnbr'], '.')

In [None]:
def maximum(dataset, column):
    maxi = []
    for ide, group in dataset.groupby('ID'):
        maxi.append(max(group[column]))
    return maxi

In [None]:
m1 = maximum(un, 'tfnbg')
m2 = maximum(deux, 'tfnbr')
m3 = maximum(trois, 'tfnbr')
m4 = maximum(quatre, 'tfnbr')
if 0 in m3:
    print(True)

In [None]:
plt.plot(m1, '.')
plt.plot(m2, '.')
plt.plot(m3, '.')
plt.plot(m4, '.')

In [None]:
f

In [None]:
def standard(dataset):
    
    scaler = MinMaxScaler(feature_range = (0, 1), copy=False)
    #scaler = StandardScaler()
    
    df = 0
    dfbis = 0
    for ide, group in dataset.groupby('ID'):

        a = dataset[dataset.ID == ide]
        c = a['images']
        data = a[a.columns[2:]].copy()
        t = ['time_g', 'time_r', 'time_i']
        
        data[['tfnbg', 'tfnbr', 'tfnbi']] = scaler.fit_transform(data[['tfnbg', 'tfnbr', 'tfnbi']])
        data[t] = data[t]-np.min(data[t])

        if ide == 0:
            df = pd.concat([c, data], axis=1)
        else:
            dfbis = pd.concat([c, data], axis = 1)
            df = pd.concat([df, dfbis])

    return df

In [None]:
g = standard(f)

In [None]:
g

In [None]:
features = ['images', 'time_g', 'tfnbg', 'time_r', 'tfnbr', 'time_i', 'tfnbi']

fig = px.scatter_matrix(
    f,
    dimensions=features
)
fig.update_traces(diagonal_visible=False)
fig.show()

In [None]:
g['images'] = g['images'].replace([1, 2, 3, 4], [0, 1, 1, 1])

In [None]:
feature_columns = g.columns.tolist()[1:3]
feature_columns

In [None]:
train = g[:91*8000]
val = g[91*8000:91*9000]
test = g[91*9000:]

In [None]:
def dataframe_to_dataset(f):
    targets = torch.IntTensor(f['images'].values)
    features = torch.tensor(f[feature_columns].values)
    return TensorDataset(features, targets)

In [None]:
TrainSet = dataframe_to_dataset(train)
TestSet = dataframe_to_dataset(test)
ValSet = dataframe_to_dataset(val)

In [None]:
class SequenceModel(nn.Module):
    
    def __init__(self, n_features, n_classes, n_hidden=128):
        
        super(SequenceModel, self).__init__()
        
        self.lstm = nn.LSTM(
            input_size = n_features,
            hidden_size = n_hidden,
            num_layers = 2,
        )
        
        self.classifier = nn.Sequential(
            nn.Linear(n_hidden, n_classes-1),
            nn.Sigmoid(),
        )
            
        
    def forward(self, x):
        self.lstm.flatten_parameters()
        lstm_out, (ht, ct) = self.lstm(x)
        pred = self.classifier(lstm_out[-1])
        return pred
        

In [None]:
torch.manual_seed(0)

In [None]:
model = SequenceModel(n_features = 2, 
                        n_classes = 2)
model = model.float()

In [None]:
print(model)

In [None]:
TrainLoader = data_utils.DataLoader(TrainSet, batch_size = 36400, shuffle = False)
ValLoader = data_utils.DataLoader(ValSet, batch_size = 9100, shuffle = False)
TestLoader = data_utils.DataLoader(TestSet, batch_size = 9100, shuffle = False)

In [None]:
loss_function = torch.nn.BCELoss()

In [None]:
optimizer = optim.Adam(model.parameters(), lr=0.0001)

In [None]:
print("--- State_dict du model : ---")
for param_tensor in model.state_dict():
    print(param_tensor, "\t", model.state_dict()[param_tensor].size())
print()

def tensor(n):
    t = torch.zeros(2)
    t[n]=1
    return t

In [None]:
losses = []  
N_epochs = 3
for epoch in range(N_epochs):  # Loop over epochs

    
    for features, label in TrainLoader:

        # Forward Propagation 
        labels_pred = model(features.float())
        #label = tensor(np.array(label[0].item()))   # for size 2 in output of the model
        label = torch.tensor([label[0].item()]).to(torch.float)
        #labels_val_pred = model(features_val.float())
        #labels_val = torch.tensor([labels_val[0].item()]).to(torch.float)
        
        # Loss computation
        loss = loss_function(labels_pred, label)
        #val_loss = loss_function(labels_val_pred, labels_val)
  
        # Save loss for future analysis
        losses.append(loss.item())
        #val_losses.append(val_loss)
        

        # Erase previous gradients
        optimizer.zero_grad()
        
        # Compute gradients (backpropagation)
        loss.backward()
        
        # Weight update
        optimizer.step()      

    print('Training done')

In [None]:
val_losses = []
for epoch in range(N_epochs):  # Loop over epochs
    running_loss = 0.0
   
    for features, labels in ValLoader:
        
        # Forward Propagation 
        labels_pred = model(features.float())
        label = torch.tensor([labels[0].item()]).to(torch.float)

        # Loss computation
        loss = loss_function(labels_pred, label)

        # Save loss for future analysis
        val_losses.append(loss.item())
        
    print('Training done')

In [None]:
# Display loss evolution
fig, axes = plt.subplots(figsize=(8,6))
axes.plot(losses,'r-',lw=2,label='Training loss function')
#axes.plot(val_losses,'b-',lw=2,label='Validation loss function')
axes.set_xlabel('N iterations',fontsize=18)
axes.set_ylabel('Loss',fontsize=18)
plt.legend(loc='upper right',fontsize=16)