# Given Questions #1
### In this notebook we will answer to some important questions about the dataset

### First, lets import the needed libraries and dataset:

In [137]:
import pandas as pd
import os
import sys
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plot
import cufflinks

# Standard plotly imports
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.figure_factory as ff

from plotly.offline import iplot, init_notebook_mode

# Using plotly + cufflinks in offline mode
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)

In [58]:
work_graduates = pd.read_csv(
    '/Users/noresources/Dropbox/_git_projects/ProgSense/data/csv/acwork_graduates.csv', sep=';', low_memory=False)

work_students = pd.read_csv(
    '/Users/noresources/Dropbox/_git_projects/ProgSense/data/csv/acwork_students.csv', sep=';', low_memory=False)

info_students = pd.read_csv(
    '/Users/noresources/Dropbox/_git_projects/ProgSense/data/csv/students_20190525.csv', sep=';', low_memory=False)

## Question 1: Statistical distribution of graduation degree
### We will plot the distribution of grade for all graduates: 

### Since we can't (yet) calculate the graduation degree we will just plot the probability distribution of all the grades



In [55]:
# select column to convert to list here
total_grades = work_graduates["Σύμβολο βαθμού"].tolist()

In [57]:
def prep_list(col):
    """
    Removes the NS and the negative grades returning a list of the grades
    """
    grade_list = col
    # Remove the empty values
    grade_list = [float(i.replace(',', '.')) if type(
        i) == str else i for i in grade_list if i != "NS"]
    # Remove the failing grades
    grade_list = [i for i in grade_list if i >= 5.0]
    return grade_list


grade_list = prep_list(total_grades)

In [10]:
fig = ff.create_distplot(
    [grade_list], ['Graduate Course Grade Distribution'], bin_size=.5)
# iplot(fig, filename='Basic Distplot')

![Prob_distr](https://i.imgur.com/ubYhS9bl.png)

In [44]:
# Lets select only a course

work_graduates.columns
grade_list = prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΕΙΣΑΓΩΓΗ ΣΤΗ ΘΕΩΡΙΑ ΣΗΜΑΤΩΝ ΚΑΙ ΣΥΣΤΗΜΑΤ']["Σύμβολο βαθμού"])

fig = ff.create_distplot(
    [grade_list], ['ΕΙΣΑΓΩΓΗ ΣΤΗ ΘΕΩΡΙΑ ΣΗΜΑΤΩΝ ΚΑΙ ΣΥΣΤΗΜΑΤ'], bin_size=.5)
iplot(fig, filename='Basic Distplot')

## Lets plot multiple courses:

In [53]:
work_graduates.columns
grade_list = []

grade_list.append(prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΕΙΣΑΓΩΓΗ ΣΤΗ ΘΕΩΡΙΑ ΣΗΜΑΤΩΝ ΚΑΙ ΣΥΣΤΗΜΑΤ']["Σύμβολο βαθμού"])
)
grade_list.append(prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΑΡΧΙΤΕΚΤΟΝΙΚΗ ΥΠΟΛΟΓΙΣΤΩΝ']["Σύμβολο βαθμού"])
)
grade_list.append(prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΔΙΚΤΥΑ ΥΠΟΛΟΓΙΣΤΩΝ']["Σύμβολο βαθμού"])
)
grade_list.append(prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΕΠΙΣΤΗΜΟΝΙΚΟΣ ΥΠΟΛΟΓΙΣΜΟΣ Ι']["Σύμβολο βαθμού"])
)

grade_list.append(prep_list(work_graduates[
    work_graduates['Μάθημα (Περιγραφή)'] == 'ΤΕΧΝΟΛΟΓΙΑ ΛΟΓΙΣΜΙΚΟΥ']["Σύμβολο βαθμού"])
)

grade_names = ['ΕΙΣΑΓΩΓΗ ΣΤΗ ΘΕΩΡΙΑ ΣΗΜΑΤΩΝ ΚΑΙ ΣΥΣΤΗΜΑΤ', 'ΑΡΧΙΤΕΚΤΟΝΙΚΗ ΥΠΟΛΟΓΙΣΤΩΝ',
               'ΔΙΚΤΥΑ ΥΠΟΛΟΓΙΣΤΩΝ', 'ΕΠΙΣΤΗΜΟΝΙΚΟΣ ΥΠΟΛΟΓΙΣΜΟΣ Ι', 'ΤΕΧΝΟΛΟΓΙΑ ΛΟΓΙΣΜΙΚΟΥ']

fig = ff.create_distplot(grade_list, grade_names, bin_size=.5)
iplot(fig, filename='Basic Distplot')

### Question 8: Gender based grade distribution:

In [131]:
# Lets load the students first:
grads = info_students[info_students['Κατάσταση'] == 'Απόφοιτος']

# For males
grad_males = grads[grads['Φύλο (1: Άρρεν, 2: Θύλη)'] == 1]
id_males = grad_males['ID'].tolist()

grades_males = work_graduates[work_graduates['ID'].isin(id_males)]

# For females
grad_females = grads[grads['Φύλο (1: Άρρεν, 2: Θύλη)'] == 2]
id_females = grad_females['ID']
grades_females = work_graduates[work_graduates['ID'].isin(id_females)]

77940

In [136]:
grade_list = [prep_list(grades_males['Σύμβολο βαθμού']),
              prep_list(grades_females['Σύμβολο βαθμού'])]

grade_names = ['Males', 'Females']
fig = ff.create_distplot(grade_list, grade_names, bin_size=.5, show_rug=True)
# iplot(fig, filename='Basic Distplot')

![Prob_distr](https://i.imgur.com/eyp7XoI.png)