# Split HWs

This notebook will split the HW into 3 folds for evaluation.

In [1]:
import pandas as pd
import glob
from shutil import copyfile
import os
from pathlib import Path
import unicodedata

In [2]:
# Read file from Google Drive with all the students
all_students = pd.read_csv(
    "https://docs.google.com/spreadsheets/d/e/2PACX-1vSLYeRhFJli5CRpnh6mbVai14mHcY_zJAIjH-leLZWYPmAyApCqAX-Hm_FKoTjchEH_sOdk_EJFtt0r/pub?gid=1742589622&single=true&output=csv")
# Keep relevant columns
all_students = all_students.loc[:,['First name','Last name', 'ID number', 'Study Plan', 'Degree Code', 'Email address']]
all_students = all_students.drop_duplicates()
all_students['Full Name'] = all_students['Last name'] + ' ' + all_students['First name'] 
# remove apostrophe from full name
all_students['Full Name'] = all_students['Full Name'].str.replace("'", "")
# sort by full name
all_students = all_students.sort_values(by='Full Name')
all_students = all_students.reset_index(drop=True)
all_students

Unnamed: 0,First name,Last name,ID number,Study Plan,Degree Code,Email address,Full Name
0,OMAR AMGAD MOHAMED,ABDELHAMEED,2125337,SCP7079399;SCQ1098227;SCQ1098228;SCP7079402;SC...,SC2598,omaramgadmohamed.abdelhameed@studenti.unipd.it,ABDELHAMEED OMAR AMGAD MOHAMED
1,OKSANA,ABRAMOVA,2073541,SCQ3102344;SCP7078720;SCQ1097939;SCP7079219;SC...,SC2377,oksana.abramova@studenti.unipd.it,ABRAMOVA OKSANA
2,MD RUBAYET,AFSAN,2106016,SCQ1098250;SCQ1097939;SCQ0093643;SC01111799;SC...,SC2598,mdrubayet.afsan@studenti.unipd.it,AFSAN MD RUBAYET
3,FLAVIO,AGOSTINI,2126584,SCP7079197;SCQ3102344;SCP7079229;SCP7079405;SC...,SC2738,flavio.agostini.1@studenti.unipd.it,AGOSTINI FLAVIO
4,YAGO RUBEN,AGUADO CARRILLO DE ALBORNOZ,2124824,SCP7079406;SCQ0093964;SCQ3104483;SCQ0094083;SC...,SC2651,yagoruben.aguadocarrillodealbornoz@studenti.un...,AGUADO CARRILLO DE ALBORNOZ YAGO RUBEN
...,...,...,...,...,...,...,...
321,AUGUSTO CESARE,ZANELLATO,2122416,SCQ0089463;SCQ0089579;SCQ2101239;SCP9087619S;S...,SC2542,augustocesare.zanellato@studenti.unipd.it,ZANELLATO AUGUSTO CESARE
322,SIMONE,ZANETTI,2125656,SCP7079397;SCP8082660;SCP7078720;SCP7079319;SC...,SC2738,simone.zanetti.3@studenti.unipd.it,ZANETTI SIMONE
323,MARTINO,ZARATIN,2125047,SCP7079401;SCQ0093689;SCP7079319;SCP7079226;SC...,SC2738,martino.zaratin@studenti.unipd.it,ZARATIN MARTINO
324,NICCOLÒ,ZENARO,2125609,SCQ1098227;SC01111799;SCQ0093641;SCQ1098250;SC...,SC2598,niccolo.zenaro@studenti.unipd.it,ZENARO NICCOLÒ



## by Student Name

In [3]:
# Define folder with merged submissions from both channels
sub_folder = Path("./HW3_1sub").resolve()
print(sub_folder)

C:\Users\paolo\OneDrive - Università degli Studi di Padova\PhD\Courses\DeepLearning2023\autocorr\HW_splitter\HW3_1sub


In [4]:
# Extract Names from folders names
sub_names = []
for folder in sub_folder.iterdir():
    if folder.is_dir():
        name = folder.name.split("_")[0]
        # Take the .ipynb file inside the folder, rename it as name and move it to ../ipynb
        for file in folder.iterdir():
            if file.is_file() and file.suffix == '.ipynb':
                #print(f"Copy {file} to {sub_folder.parent / 'ipynb' / (name + '.ipynb')}")
                copyfile(file, sub_folder.parents[1] / 'ipynb' / (name + '.ipynb'))
        # save name        
        sub_names.append(name)
        # check whether name is in all_students or not
        if name not in all_students['Full Name'].values:
            print("Name {} is not present in all_students".format(name))

print("There are {} submisisons".format(len(sub_names)))

There are 180 submisisons


In [5]:
# check names where are present in all_students
submissions = all_students[all_students['Full Name'].isin(sub_names)]
submissions

Unnamed: 0,First name,Last name,ID number,Study Plan,Degree Code,Email address,Full Name
0,OMAR AMGAD MOHAMED,ABDELHAMEED,2125337,SCP7079399;SCQ1098227;SCQ1098228;SCP7079402;SC...,SC2598,omaramgadmohamed.abdelhameed@studenti.unipd.it,ABDELHAMEED OMAR AMGAD MOHAMED
2,MD RUBAYET,AFSAN,2106016,SCQ1098250;SCQ1097939;SCQ0093643;SC01111799;SC...,SC2598,mdrubayet.afsan@studenti.unipd.it,AFSAN MD RUBAYET
3,FLAVIO,AGOSTINI,2126584,SCP7079197;SCQ3102344;SCP7079229;SCP7079405;SC...,SC2738,flavio.agostini.1@studenti.unipd.it,AGOSTINI FLAVIO
5,ULASCAN,AKBULUT,2106046,SCP7079319;SCQ0089498;SCQ3102327;SCP7079226;SC...,SC2738,ulascan.akbulut@studenti.unipd.it,AKBULUT ULASCAN
9,AMJAD,ALI,2106878,SCQ3102343;SCP9087561;SCQ0089498;SCP7078720;SC...,SC2738,amjad.ali@studenti.unipd.it,ALI AMJAD
...,...,...,...,...,...,...,...
317,ALI,ZAMINI,2070871,SCQ1098227;SCP6076377;SCQ0089463;SCQ0089518;SC...,SC2542,ali.zamini@studenti.unipd.it,ZAMINI ALI
318,SARA,ZAMPELLI,2105691,SCP7079231;SCP8082660;SCP7079226;SCP7079319;SC...,SC2738,sara.zampelli@studenti.unipd.it,ZAMPELLI SARA
322,SIMONE,ZANETTI,2125656,SCP7079397;SCP8082660;SCP7078720;SCP7079319;SC...,SC2738,simone.zanetti.3@studenti.unipd.it,ZANETTI SIMONE
323,MARTINO,ZARATIN,2125047,SCP7079401;SCQ0093689;SCP7079319;SCP7079226;SC...,SC2738,martino.zaratin@studenti.unipd.it,ZARATIN MARTINO


In [6]:
# Look for duplicates
dup = {x for x in sub_names if sub_names.count(x) >1}
print(dup)

set()


In [7]:
# Split the dataframe in 3 blocks
from more_itertools import divide
sub_chunks = divide(3, submissions.index)
sub_chunks = [list(x) for x in sub_chunks]
print(f"Submisisons in 3 blocks of length: {[len(sub_chunks[i]) for i in range(3)]}")

Submisisons in 3 blocks of length: [60, 60, 60]


In [8]:
# Split among TAs
TAs = ['Flavio', 'Paolo', 'Uzair']
for i, ta in enumerate(TAs):
    submissions.loc[sub_chunks[i], "TA"] = ta
submissions

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submissions.loc[sub_chunks[i], "TA"] = ta


Unnamed: 0,First name,Last name,ID number,Study Plan,Degree Code,Email address,Full Name,TA
0,OMAR AMGAD MOHAMED,ABDELHAMEED,2125337,SCP7079399;SCQ1098227;SCQ1098228;SCP7079402;SC...,SC2598,omaramgadmohamed.abdelhameed@studenti.unipd.it,ABDELHAMEED OMAR AMGAD MOHAMED,Flavio
2,MD RUBAYET,AFSAN,2106016,SCQ1098250;SCQ1097939;SCQ0093643;SC01111799;SC...,SC2598,mdrubayet.afsan@studenti.unipd.it,AFSAN MD RUBAYET,Flavio
3,FLAVIO,AGOSTINI,2126584,SCP7079197;SCQ3102344;SCP7079229;SCP7079405;SC...,SC2738,flavio.agostini.1@studenti.unipd.it,AGOSTINI FLAVIO,Flavio
5,ULASCAN,AKBULUT,2106046,SCP7079319;SCQ0089498;SCQ3102327;SCP7079226;SC...,SC2738,ulascan.akbulut@studenti.unipd.it,AKBULUT ULASCAN,Flavio
9,AMJAD,ALI,2106878,SCQ3102343;SCP9087561;SCQ0089498;SCP7078720;SC...,SC2738,amjad.ali@studenti.unipd.it,ALI AMJAD,Flavio
...,...,...,...,...,...,...,...,...
317,ALI,ZAMINI,2070871,SCQ1098227;SCP6076377;SCQ0089463;SCQ0089518;SC...,SC2542,ali.zamini@studenti.unipd.it,ZAMINI ALI,Uzair
318,SARA,ZAMPELLI,2105691,SCP7079231;SCP8082660;SCP7079226;SCP7079319;SC...,SC2738,sara.zampelli@studenti.unipd.it,ZAMPELLI SARA,Uzair
322,SIMONE,ZANETTI,2125656,SCP7079397;SCP8082660;SCP7078720;SCP7079319;SC...,SC2738,simone.zanetti.3@studenti.unipd.it,ZANETTI SIMONE,Uzair
323,MARTINO,ZARATIN,2125047,SCP7079401;SCQ0093689;SCP7079319;SCP7079226;SC...,SC2738,martino.zaratin@studenti.unipd.it,ZARATIN MARTINO,Uzair


In [9]:
# Add columns
submissions.loc[:,"Final Grade"] = 0
submissions.loc[:,"Feedback"] = ""
# Export dataframe to excel
submissions.to_excel(sub_folder.stem + ".xlsx", index=False)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submissions.loc[:,"Final Grade"] = 0
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  submissions.loc[:,"Feedback"] = ""
