In [2]:
from __future__ import print_function
import argparse
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np
import pandas as pd
import csv
import math
import matplotlib as plt

In [13]:
batch_size=32
csv_flie="atlas-higgs-challenge-2014-v2.csv"
crossval_num=3
crossval=np.array(crossval_num)

In [14]:
class Dataset_Cust(torch.utils.data.Dataset):
    def __init__(self, csv_file, train=True, crossval_nbr=3, transformation=None):
        'Initialization'
        self.all=np.array(pd.read_csv(csv_file))
        #print(self.all[:,:-3])
        self.data=torch.from_numpy(self.all[:,0:-3].astype('float64'))
        self.train=train
        
        self.crossval_nbr=crossval_nbr
        self.testpart_num=crossval_nbr-1
        
        self.nbrpoint=len(self.all)
        #self.nbrtrain=int((1-1/crossval_nbr)*self.nbrpoint)
        
        self.Taille_section=int(self.nbrpoint/self.crossval_nbr)
        self.debut_test=self.testpart_num * self.Taille_section
        
        if self.testpart_num==self.crossval_nbr-1:
            self.fin_test=self.nbrpoint
        else:
            self.fin_test=self.debut_test + self.Taille_section
        
        self.nbrtest=self.fin_test - self.debut_test
        self.nbrtrain=self.nbrpoint - self.nbrtest
        #self.numtest=self.numpoint - self.numtrain
        
        self.transformation=transformation
        

    def __len__(self):
        'Denotes the total number of samples'
        if self.train:
            return self.nbrtrain
        else:
            return self.fin_test - self.debut_test
    

    def __getitem__(self, index):
        'Generates one sample of data'
        # Select sample
        if self.train:
            if index>= self.debut_test:
                index+=self.nbrtest
                
        else:
            index+=self.debut_test
        
        #sample=torch.from_numpy(self.all[index][0:-3].astype('float64'))
        sample=self.data[index]
        sample.requires_grad=True
        label=1 if self.all[index][-3]=="s" else 0
        
        if self.transformation :
            sample=self.transformation(sample)

        return sample.float(), float(label)
    
    def Set_Train(self, train):
        self.train=train
        
    def Set_TestPart_Num(self,num):
        self.testpart_num=num
        
        self.debut_test=self.testpart_num * self.Taille_section
        
        if self.testpart_num==self.crossval_nbr-1:
            self.fin_test=self.nbrpoint
        else:
            self.fin_test=self.debut_test + self.Taille_section
        
        self.nbrtest=self.fin_test - self.debut_test
        self.nbrtrain=self.nbrpoint - self.nbrtest

In [15]:
Dataset=Dataset_Cust(csv_flie)

In [16]:
Train_Loader=torch.utils.data.DataLoader(Dataset, batch_size=batch_size, shuffle=True)

In [17]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
        self.conv1=nn.Conv1d(1,10,5)
        self.conv2=nn.Conv1d(10,20,5)
        self.fc=nn.Linear(50*4*4,1)
        
    def forward(self, x):
        x=F.max_pool1d(F.relu(self.conv1(x)),2,2)
        x=F.max_pool1d(F.relu(self.conv2(x)),2,2)
        x=x.view(-1,50*4*4)
        x=self.fc(x)
        return F.sigmoid(x)

In [8]:
Cnn=CNN()

In [9]:
criterion= nn.BCELoss()
optimizeCnn= optim.SGD(Cnn.parameters(), lr=0.001, momentum=0.9)

In [10]:
def Train(Model, Optimizer, Criterion, Data_Loader,Epoch=10, log_interval=100):
    #Data_Loader.dataset.Set_TestPart_Num(4)
    print(Data_Loader.dataset.debut_test)
    predicted=np.empty(batch_size)
    #print(type(Percep.fc1.weight[0][0].item()))
    #print(type(Percep.fc1.bias[0].item()))
    init_weights(Model)
    
    Data_Loader.dataset.train=True
    
    for epoch in range(Epoch):
        ams=0
        TP=0 #True Positive
        FP=0 # False Positive
        S=0 #True Positive pondéré
        B=0 # False Positive pondéré
        Loss=0
        correct=0
    
        fp=0
        tp=0
        total=0
        running_loss=0
        for batch_id, sample in enumerate(Data_Loader,0):
            inputs, labels= sample
            Optimizer.zero_grad()
            
            #print(inputs[0][0].requires_grad)
            #print(labels.size(0))
            #print(inputs[0][0])
            
            
            outputs= Model(inputs[:,1:31])
        
             #print(outputs[0]+outputs[1])
             # print(outputs.view(-1,32))
            
            #predicted= torch.tensor(1 if outputs.data[j] > 0.5 else 0 for j in range(len(outputs.data)))
            total+= labels.size(0)
            #correct+= (predicted== labels).sum().item()
            for i in range(labels.size(0)):
                if outputs.data[i]>0.5:
                    predicted[i]=1
                    if labels[i]==1:
                        TP+=1
                        tp+=1
                        S+= inputs[i][31]
                        correct+=1
                    else:
                        FP+=1
                        fp+=1
                        B+=inputs[i][31]
                else:
                    predicted[i]=0
                    if labels[i]==0:
                        correct+=1
            
        
            #print(outputs)
            #outputs=outputs.view(-1,10)
            loss= Criterion(outputs, labels.float())
            loss.backward()
            Optimizer.step()
        
            running_loss+= loss.item()
            Loss+=loss.item()
            if batch_id%log_interval==0:
                if tp+fp>0:
                    print("epoch:{}, nombre batch:{} loss:{:.3f} precision={:.3f}%".format(epoch+1, batch_id+1, running_loss/log_interval, 100*tp/(tp+fp))) 
                else:
                    print("epoch:{}, nombre batch:{} loss:{:.3f} ".format(epoch+1, batch_id+1, running_loss/log_interval)) 

                total=0
                tp=0
                fp=0
                running_loss=0
        if TP+FP>0:
            print("Resultat Epoch {};  loss:{:.3f}, Accuracy={:.3f}  precision:{:.3f}%, AMS={:.4f}".format(epoch+1, Loss/(batch_id+1), 100*correct/len(Data_Loader.dataset), 100*TP/(TP+FP), AMS(S,B) ))
        else:
            print("Resultat Epoch {};  loss:{:.3f}, Accuracy={:.3f}  , AMS={:.4f}".format(epoch+1, Loss/(batch_id+1), 100*correct/len(Data_Loader.dataset), AMS(S,B) ))
        
    print("Résultat Entrainement: {} bonnes classification sur {} données :loss moyen/batch={:.4f}, Accuracy={:.3f},  precision={:.3f}%, AMS={:.4f}".format(correct,len(Data_Loader.dataset), Loss/(batch_id+1), 100*correct/len(Data_Loader.dataset) ,100*TP/(TP+FP), AMS(S,B)))
    return [Loss/(batch_id+1), 100*correct/len(Data_Loader.dataset), 100*TP/(TP+FP), AMS(S,B) ]
        

In [11]:
def Test(Model, Criterion, Data_Loader):
    #TEST Perceptron

    correct= 0
    total= 0
    test_loss = 0
    predicted=np.empty(batch_size)
    TP=0
    FP=0
    S=0
    B=0

    Data_Loader.dataset.train=False
    with torch.no_grad():
        for sample in Data_Loader:
            inputs, labels= sample
            #images=images.view(-1,784)
            outputs= Model(inputs[:,1:31])

            total+= labels.size(0)

            for i in range(labels.size(0)):
                    if outputs.data[i]>0.5:
                        predicted[i]=1

                        if labels[i]==1:
                            TP+=1
                            #tp+=1
                            S+= inputs[i][31]
                            correct+=1
                        else:
                            FP+=1
                            #fp+=1
                            B+=inputs[i][31]
                    else:
                        predicted[i]=0
                        if labels[i]==0:
                            correct+=1

            test_loss+=Criterion(outputs,labels.float())




    test_loss/= len(Data_Loader.dataset)

    print("Résultat test: {} bonnes classification sur {} données de test:loss moyen={:.8f} Accuracy={:.3f}%, Precision={:.3f}%, AMS={:.3f}".format(correct,total, test_loss/total,100*correct/total, 100*TP/(TP+FP), AMS(S,B)))
    
    return [test_loss/total, 100*correct/total, 100*TP/(TP+FP), AMS(S,B)]


In [12]:
#TRAIN CNN

Epoch=10

log_interval=100
predicted=np.empty(batch_size)
#print(type(Percep.fc1.weight[0][0].item()))
#print(type(Percep.fc1.bias[0].item()))
for epoch in range(Epoch):
    ams=0
    TP=0 #True Positive
    FP=0 # False Positive
    S=0 #True Positive pondéré
    B=0 # False Positive pondéré
    Loss=0
    correct=0
    
    fp=0
    tp=0
    total=0
    running_loss=0
    for batch_id, sample in enumerate(Train_Loader,0):
            inputs, labels= sample
            optimizeCnn.zero_grad()
            
            #print(inputs)
            #print(labels.size(0))
            
            
            
            outputs= Cnn(inputs[:,1:31])
            
            #print(outputs[0]+outputs[1])
           # print(outputs.view(-1,32))
        
            #predicted= torch.tensor(1 if outputs.data[j] > 0.5 else 0 for j in range(len(outputs.data)))
            total+= labels.size(0)
            #correct+= (predicted== labels).sum().item()
            for i in range(labels.size(0)):
                if outputs.data[i]>0.5:
                    predicted[i]=1
            
                    if labels[i]==1:
                        TP+=1
                        tp+=1
                        S+= inputs[i][31]
                        correct+=1
                    else:
                        FP+=1
                        fp+=1
                        B+=inputs[i][31]
                else:
                    predicted[i]=0
                    if labels[i]==0:
                        correct+=1
            
        
            #print(outputs)
            #outputs=outputs.view(-1,10)
            loss= criterion(outputs, labels.float())
            loss.backward()
            optimizeCnn.step()
        
            running_loss+= loss.item()
            Loss+=loss.item()
            if batch_id%log_interval==0:
                print("epoch:{}, nombre batch:{} loss:{:.3f} precision={:.3f}%".format(epoch+1, batch_id+1, running_loss/log_interval, 100*tp/(tp+fp))) 
                total=0
                tp=0
                fp=0
                running_loss=0
    print("Resultat Epoch {};  loss:{:.3f}, Accuracy={:.3f}%,  precision:{:.3f}%, AMS={:.4f}".format(epoch+1, Loss/(batch_id+1), 100*correct/len(Train_Loader.dataset), 100*TP/(TP+FP), AMS(S,B) ))
   
        
print("Résultat Entrainement: {} bonnes classification sur {} données :loss moyen/batch={:.4f}, Accuracy={:.3f}%,  precision={:.3f}%, AMS={:.4f}".format(correct,len(Train_Loader.dataset), Loss/(batch_id+1), 100*correct/len(Train_Loader.dataset) ,100*TP/(TP+FP), AMS(S,B)))

        

RuntimeError: Expected 3-dimensional input for 3-dimensional weight 10 1 5, but got 2-dimensional input of size [32, 30] instead