## Problem Statement

- DESCRIPTION : A university wants to categorize the marks scored by the students of each batch in the quizzes conducted.

In [1]:
# importing the libraries

import numpy as np
import pandas as pd
import glob, os

In [2]:
# importing and reading 'studentlist.csv'

data = pd.read_csv('studentlist.csv')
data.head()

Unnamed: 0,enrollmentnumber,admn,studentname,studentEmail
0,1613101016,16SCSE101757,AAYUSH TYAGI,
1,1613101026,16SCSE101415,ABHISHEK,
2,1613101051,16SCSE101222,ADITI SINGH,
3,1613101054,16SCSE101220,ADITYA MANKAR,
4,1613101079,16SCSE101085,AKASH KANSAL,


In [3]:
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1017 entries, 0 to 1016
Data columns (total 4 columns):
enrollmentnumber    1017 non-null int64
admn                1017 non-null object
studentname         1017 non-null object
studentEmail        0 non-null float64
dtypes: float64(1), int64(1), object(2)
memory usage: 31.9+ KB


**There are 1017 rows and 3 columns in the dataframe 'data'. Column 'studentEmail' doesn't have any value present in the column**

In [4]:
# dropping 'studentEmail' column since it doesn't have any values

data = data.drop(columns=['studentEmail'])
data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1017 entries, 0 to 1016
Data columns (total 3 columns):
enrollmentnumber    1017 non-null int64
admn                1017 non-null object
studentname         1017 non-null object
dtypes: int64(1), object(2)
memory usage: 24.0+ KB


In [5]:
# reading the files from folder 'batchwiselist' and creating dataframe for the same
# listing all directories in the folder

batchwiselist = 'C:/Users/Indranil/batchwiselist'
files = glob.glob(batchwiselist + '/*.csv')
batch_lst = []

for file in files:
    df=pd.read_csv(file,index_col='srNo')
    batch_lst.append(df)

frame = pd.concat(batch_lst)


# Also dropping 'Unnamed: 0' column as it holds no value to dataframe since we have srNo column
frame = frame.drop(columns=['Unnamed: 0'])
frame.head()

Unnamed: 0_level_0,enrollmentNumber,admissionNumber,studentName
srNo,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
1,1613101016,16SCSE101757,AAYUSH TYAGI
2,1613101026,16SCSE101415,ABHISHEK
3,1613101051,16SCSE101222,ADITI SINGH
4,1613101054,16SCSE101220,ADITYA MANKAR
5,1613101079,16SCSE101085,AKASH KANSAL


In [6]:
frame.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1017 entries, 1 to 43
Data columns (total 3 columns):
enrollmentNumber    1017 non-null int64
admissionNumber     1017 non-null object
studentName         1017 non-null object
dtypes: int64(1), object(2)
memory usage: 31.8+ KB


****After creating a dataframe of all the files in the 'batchwiselist' folder we see the columns are the same as dataframe of 'studentlist'****

In [7]:
frame.shape

(1017, 3)

In [8]:
# reading the files from folder 'quiz'

quiz = 'C:/Users/Indranil/quiz'
files1 = glob.glob(quiz + '/*.csv')
quiz_lst = []

for file in files1:
    df1 = pd.read_csv(file)
    quiz_lst.append(df1)

# assigning quiz1 & quiz2 data to variable Quiz_1 & Quiz_2
Quiz_1 = quiz_lst[0]
Quiz_2 = quiz_lst[1]

# dropping the columns which have only null values ID number, Institution, Department, Email address
Quiz_1 = Quiz_1.drop(columns = ['ID number', 'Institution', 'Department', 'Email address'])
Quiz_2 = Quiz_2.drop(columns = ['ID number', 'Institution', 'Department', 'Email address'])



In [9]:
# creating the statistics dataframe which will initially contain only NaN values

statistics = pd.DataFrame(columns=["no of present", "less than 50", "between 50 and 60", "between 60 and 70",
                                   "between 70 and 80", "greater than 80"], index=['Quiz 1', 'Quiz 2'])
statistics

Unnamed: 0,no of present,less than 50,between 50 and 60,between 60 and 70,between 70 and 80,greater than 80
Quiz 1,,,,,,
Quiz 2,,,,,,


**The dataframe statistics has been created**

In [10]:
# dropping null values in 'Quiz1' & 'Quiz2'

Quiz_1['Firstname'] = Quiz_1['Firstname'].dropna()
Quiz_2['Firstname'] = Quiz_2['Firstname'].dropna()

In [11]:
# 'Grade/10.00' column in both the quiz has '-' present and thus replacing it with '0' as they are present in very small amount
# also converting '0' to numeric type

Quiz_1['Grade/10.00'] = Quiz_1['Grade/10.00'].str.replace('-', '0')
Quiz_2['Grade/10.00'] = Quiz_2['Grade/10.00'].str.replace('-', '0')
Quiz_1['Grade/10.00'] = Quiz_1['Grade/10.00'].apply(pd.to_numeric)
Quiz_2['Grade/10.00'] = Quiz_2['Grade/10.00'].apply(pd.to_numeric)


**Now  we have 'Quiz_1' & 'Quiz_2' as ready dataframes for allocating students as per the scores obtained in a quiz**

#### Filling the dataframe 'statistics' with the required data from 'Quiz_1' & 'Quiz_2' 

In [12]:
statistics['no of present'] = [Quiz_1['Firstname'].count(), Quiz_2['Firstname'].count()]

statistics['less than 50'] = [Quiz_1[Quiz_1['Grade/10.00'].between(0,4.9, inclusive=True)]['Firstname'].count(),
                                  Quiz_2[Quiz_2['Grade/10.00'].between(0,4.9, inclusive=True)]['Firstname'].count()]
                                     
statistics['between 50 and 60'] = [Quiz_1[Quiz_1['Grade/10.00'].between(5,5.9, inclusive=True)]['Firstname'].count(),
                                  Quiz_2[Quiz_2['Grade/10.00'].between(5,5.9, inclusive=True)]['Firstname'].count()]
                                     
statistics['between 60 and 70'] = [Quiz_1[Quiz_1['Grade/10.00'].between(6,6.9, inclusive=True)]['Firstname'].count(),
                                  Quiz_2[Quiz_2['Grade/10.00'].between(6,6.9, inclusive=True)]['Firstname'].count()]
                                     
statistics['between 70 and 80'] = [Quiz_1[Quiz_1['Grade/10.00'].between(7,7.9, inclusive=True)]['Firstname'].count(),
                                  Quiz_2[Quiz_2['Grade/10.00'].between(7,7.9, inclusive=True)]['Firstname'].count()]
                                     
statistics['greater than 80'] = [Quiz_1[Quiz_1['Grade/10.00'].between(8,10, inclusive=True)]['Firstname'].count(),
                                  Quiz_2[Quiz_2['Grade/10.00'].between(8,10, inclusive=True)]['Firstname'].count()]

In [13]:
statistics

Unnamed: 0,no of present,less than 50,between 50 and 60,between 60 and 70,between 70 and 80,greater than 80
Quiz 1,659,127,69,57,58,348
Quiz 2,680,93,40,39,41,467
