In [139]:
import pickle
import os
import pandas as pd
import numpy as np


In [140]:
train_file = "./input/extended_mnist_train.pkl"
test_file = "./input/extended_mnist_test.pkl"

with open(train_file, "rb") as fp:
    train = pickle.load(fp)

with open(test_file, "rb") as fp:
    test = pickle.load(fp)

In [141]:
#construim training data-ul + labeling 
#si le convertim in np arrays pt a folosi avantajele lib numpy(mai ales la operatii intre matrici)
train_data = []
train_labels = []
for image, label in train:
    train_data.append(image.flatten())
    train_labels.append(label)
    
train_data = np.array(train_data)
train_labels = np.array(train_labels)

In [142]:

#acelasi lucru pentru testing data
test_data = []
for image, label in test:
    test_data.append(image.flatten())

test_data = np.array(test_data)


#print(f"date train: {train_data.shape}")
#print(f"label train: {train_labels.shape}")
#print(f"date test: {test_data.shape}")

In [143]:
#initializarea costurilor si a bias-ului 

np.random.seed(74)

features =784 #(pixeli din poza 28*28)
clase = 10 # de la 0 la 9

W = np.random.randn(features,clase) * 0.01 # am immultit cu 0.01 ca atunci cand facem folosim softmaxul sa nu avem numere gigantice gen e^150 
bias = np.zeros(clase) #aici o sa aiba forma (10,)  

In [144]:
def softmax(scoruri):
    exp_scoruri = np.exp(scoruri - np.max(scoruri, axis=1, keepdims=True))
    # returnam practic transformarea din setul de scoruri (Z) intr-un set de probabilitati
    # mai exact returneaza cele 10 (0,1,2.....9) probabilitati simultan 
    return exp_scoruri / np.sum(exp_scoruri, axis=1, keepdims=True) 
    

def one_hot_encode(labels, clase):
    #transformam din label in clasels n
    # PRACTIC daca noi avem la o instanta label ul 3 noi vom return aun set de n clase (in cazu nostru 10) cv de genu 
    # 3 -> [0,0,0,1,0,0,0,0,0,0]
    m = labels.shape[0]
    one_hot = np.zeros((m, clase))
    one_hot[np.arange(m), labels] = 1
    return one_hot

def cross_entropy_loss(y_pred, y_true): 
    #y_pred este predictia modelului (rezulatatul softmax)
    #y_true este forma transformata a labelului acelei instante.
    #folosim y_true pentru a arata cat de aproape de "adevar" este modelul nostru

    m = y_pred.shape[0] #nr loturi

    epsilon = 1e-10 # ca sa ne asiguram ca indiferent ce y_pred avem logaritmul nu arunca vreo eroare tip log(0)

    loss = -np.sum(y_true * np.log(y_pred + epsilon)) / m 
    return loss

def forward_propagation(X, W, bias):
    z = np.dot(X, W) + bias
    y_pred = softmax(z)
    return y_pred

def backward_propagation(X, y_pred, y_true, W, b, learning_rate):
    m = X.shape[0] # luam numarul de monstre
    gradient = y_true - y_pred # am calculat practic cat de mult a gresit modelul pentru fiecare clasa in parte


    dW = np.dot(X.T, gradient) / m # am calculat delta W, cam cat de mult a contribuit fiecare pixel din input la procentul de eroare 
    W_nou = W + learning_rate * dW #updatam noile weight uri (Weightul vechi + pasul + directia pasului)

    #aici o sa calculam cat de mult trebuie sa ajustam bias ul (acelasi lucru ca la weights)
    db = np.sum(gradient, axis=0) / m
    b_nou = b + learning_rate * db

    
    return W_nou, b_nou

In [145]:
# Hiperparametri
learning_rate = 0.001
epoci = 150
batch_size = 64

# Converteste etichetele
y_train_one_hot = one_hot_encode(train_labels, clase)

print("antrenarea perceptorului")
print("="*10)
for epoch in range(epoci):
    indices = np.random.permutation(train_data.shape[0])

    X_shuffled = train_data[indices]
    y_shuffled = y_train_one_hot[indices]
    
    epoch_loss = 0
    n_batches = 0
    
    for i in range(0, train_data.shape[0], batch_size):
        X_batch = X_shuffled[i:i+batch_size]
        y_batch = y_shuffled[i:i+batch_size]
        


        #partea de ghicire
        y_pred = forward_propagation(X_batch, W, bias)


        #partea de verificare
        batch_loss = cross_entropy_loss(y_pred, y_batch)
        epoch_loss += batch_loss
        n_batches += 1
        #cumva aici se termina partea de "invatare"

        #Partea de corectarea 
        W, bias = backward_propagation(X_batch, y_pred, y_batch, W, bias, learning_rate)
    
    avg_loss = epoch_loss / n_batches
    

    if (epoch + 1) % 10 == 0:
        learning_rate = learning_rate + 0.0005   
        y_train_pred = forward_propagation(train_data, W, bias)
        train_predictions = np.argmax(y_train_pred, axis=1)
        train_accuracy = np.mean(train_predictions == train_labels)
        print(f"epoca {epoch+1}/{epoci} cu loss ul; {avg_loss:.4f} si accuracy: {train_accuracy:.4f} si learning rate {learning_rate}")

antrenarea perceptorului
epoca 10/150 cu loss ul; 2.4838 si accuracy: 0.9034 si learning rate 0.0015
epoca 20/150 cu loss ul; 2.5153 si accuracy: 0.8093 si learning rate 0.002
epoca 30/150 cu loss ul; 2.5516 si accuracy: 0.8794 si learning rate 0.0025
epoca 40/150 cu loss ul; 2.4869 si accuracy: 0.8253 si learning rate 0.003
epoca 50/150 cu loss ul; 2.5599 si accuracy: 0.8731 si learning rate 0.0035
epoca 60/150 cu loss ul; 2.5228 si accuracy: 0.8990 si learning rate 0.004
epoca 70/150 cu loss ul; 2.5041 si accuracy: 0.8943 si learning rate 0.0045000000000000005
epoca 80/150 cu loss ul; 2.4718 si accuracy: 0.8775 si learning rate 0.005000000000000001
epoca 90/150 cu loss ul; 2.4708 si accuracy: 0.8512 si learning rate 0.005500000000000001
epoca 100/150 cu loss ul; 2.5283 si accuracy: 0.8770 si learning rate 0.006000000000000002
epoca 110/150 cu loss ul; 2.4939 si accuracy: 0.8976 si learning rate 0.006500000000000002
epoca 120/150 cu loss ul; 2.4811 si accuracy: 0.8960 si learning rate

In [146]:
y_train_pred = forward_propagation(train_data, W, bias)
train_predictions = np.argmax(y_train_pred, axis=1)
train_accuracy = np.mean(train_predictions == train_labels)
print(f"acuratetea finala {train_accuracy:.4f}")

y_test_pred = forward_propagation(test_data, W, bias)
test_predictions = np.argmax(y_test_pred, axis=1)
print(f"predictia {test_predictions[:10]}")

acuratetea finala 0.8495
predictia [9 4 3 0 2 1 2 9 5 5]


In [147]:
# This is how you prepare a submission for the competition
predictions_csv = {
    "ID": [],
    "target": [],
}

for i, label in enumerate(test_predictions):
    predictions_csv["ID"].append(i)
    predictions_csv["target"].append(label)

df = pd.DataFrame(predictions_csv)
df.to_csv("submission.csv", index=False)
print("GATAAAAAAAAAAAAAAAAAAAAA")

GATAAAAAAAAAAAAAAAAAAAAA
