In [1]:
import pandas as pd
import numpy as np

## Definitions

In [2]:
def assign_curve_extra_pts(unified_info, course_name, curve_extra_pts, grade_to_get_pts):
    unified_courses_with_extra_pts = unified_info.loc[unified_info['Course Name'] == course_name]
    unified_courses_with_extra_pts['Extra Points'] = np.where(unified_courses_with_extra_pts['Grade'].between(0, grade_to_get_pts, inclusive=True), curve_extra_pts, 0)
    return unified_courses_with_extra_pts

In [3]:
def merging_df(dataframes_list):
    merged_df = pd.concat(dataframes_list)
    print(merged_df.shape)
    print(merged_df.tail(10))
    return merged_df

In [4]:
def xcolumnlookup(lookup_value, lookup_array, return_array, if_not_found:str = ''):
    match_value = return_array.loc[lookup_array == lookup_value]
    if match_value.empty:
        return "Not Found" if if_not_found == '' else if_not_found

    else:
        return match_value.tolist()[0]

## Import information

In [5]:
unified_courses_df = pd.read_csv("./_output/0_unified_courses_data.csv", header='infer')

In [6]:
output = {}
output['Columns'] = unified_courses_df.columns.values.tolist()
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output)

Unnamed: 0,Columns
0,Student Name
1,Student Contact
2,Grade
3,Course Name


In [7]:
output = {}
output['Columns'] = unified_courses_df.dtypes
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output)

Unnamed: 0,Columns
Student Name,object
Student Contact,object
Grade,int64
Course Name,object


In [8]:
unified_courses_df.head(10)

Unnamed: 0,Student Name,Student Contact,Grade,Course Name
0,Al,al@gmail.com,50,Math
1,Dominic,dominic@gmail.com,75,Math
2,Mark,mark@gmail.com,90,Math
3,Shane,shane@gmail.com,87,Math
4,Al,al@gmail.com,90,Biology
5,Dominic,dominic@gmail.com,90,Biology
6,Mark,mark@gmail.com,90,Biology
7,Shane,shane@gmail.com,95,Biology
8,Al,al@gmail.com,80,Chemistry
9,Dominic,dominic@gmail.com,56,Chemistry


In [9]:
unified_courses_df.shape

(12, 4)

## Calculations

In [10]:
math_with_extra_pts = assign_curve_extra_pts(unified_courses_df, "Math", 10, 70)
math_with_extra_pts.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Student Name,Student Contact,Grade,Course Name,Extra Points
0,Al,al@gmail.com,50,Math,10
1,Dominic,dominic@gmail.com,75,Math,0
2,Mark,mark@gmail.com,90,Math,0
3,Shane,shane@gmail.com,87,Math,0


In [11]:
chem_with_extra_pts = assign_curve_extra_pts(unified_courses_df, "Chemistry", 15, 60)
chem_with_extra_pts.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Student Name,Student Contact,Grade,Course Name,Extra Points
8,Al,al@gmail.com,80,Chemistry,0
9,Dominic,dominic@gmail.com,56,Chemistry,15
10,Mark,mark@gmail.com,58,Chemistry,15
11,Shane,shane@gmail.com,74,Chemistry,0


In [12]:
bio_with_extra_pts = assign_curve_extra_pts(unified_courses_df, "Biology", 5, 50)
bio_with_extra_pts.head(10)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  This is separate from the ipykernel package so we can avoid doing imports until


Unnamed: 0,Student Name,Student Contact,Grade,Course Name,Extra Points
4,Al,al@gmail.com,90,Biology,0
5,Dominic,dominic@gmail.com,90,Biology,0
6,Mark,mark@gmail.com,90,Biology,0
7,Shane,shane@gmail.com,95,Biology,0


## Merging Data

In [13]:
courses_with_extra = [bio_with_extra_pts, math_with_extra_pts, chem_with_extra_pts]

In [14]:
unified_extra_pts = merging_df(courses_with_extra)

(12, 5)
   Student Name     Student Contact  Grade Course Name  Extra Points
6   Mark          mark@gmail.com     90     Biology     0           
7   Shane         shane@gmail.com    95     Biology     0           
0   Al            al@gmail.com       50     Math        10          
1   Dominic       dominic@gmail.com  75     Math        0           
2   Mark          mark@gmail.com     90     Math        0           
3   Shane         shane@gmail.com    87     Math        0           
8   Al            al@gmail.com       80     Chemistry   0           
9   Dominic       dominic@gmail.com  56     Chemistry   15          
10  Mark          mark@gmail.com     58     Chemistry   15          
11  Shane         shane@gmail.com    74     Chemistry   0           


## Califications Summarize

In [15]:
unified_extra_pts.shape

(12, 5)

In [16]:
calif_sum = unified_extra_pts.groupby('Student Contact',as_index=False).agg({'Course Name':'first', 'Grade':'sum'})

In [17]:
calif_sum.shape

(4, 3)

In [18]:
calif_sum.head(3)

Unnamed: 0,Student Contact,Course Name,Grade
0,al@gmail.com,Biology,220
1,dominic@gmail.com,Biology,221
2,mark@gmail.com,Biology,238


In [19]:
### Adding extra points 
calif_sum['Grade + Extra'] = calif_sum['Grade'] + unified_extra_pts['Extra Points']

In [20]:
calif_sum.head(10)

Unnamed: 0,Student Contact,Course Name,Grade,Grade + Extra
0,al@gmail.com,Biology,220,230.0
1,dominic@gmail.com,Biology,221,221.0
2,mark@gmail.com,Biology,238,238.0
3,shane@gmail.com,Biology,256,256.0


In [21]:
unified_extra_pts.head(10)

Unnamed: 0,Student Name,Student Contact,Grade,Course Name,Extra Points
4,Al,al@gmail.com,90,Biology,0
5,Dominic,dominic@gmail.com,90,Biology,0
6,Mark,mark@gmail.com,90,Biology,0
7,Shane,shane@gmail.com,95,Biology,0
0,Al,al@gmail.com,50,Math,10
1,Dominic,dominic@gmail.com,75,Math,0
2,Mark,mark@gmail.com,90,Math,0
3,Shane,shane@gmail.com,87,Math,0
8,Al,al@gmail.com,80,Chemistry,0
9,Dominic,dominic@gmail.com,56,Chemistry,15


In [22]:
unified_extra_pts_no_duplicates = unified_extra_pts.copy()
unified_extra_pts_no_duplicates.shape

(12, 5)

In [23]:
unified_extra_pts_no_duplicates.drop(columns=['Grade', 'Course Name', 'Extra Points'],  axis=1, inplace=True)
unified_extra_pts_no_duplicates.shape

(12, 2)

In [24]:
unified_extra_pts_no_duplicates =  unified_extra_pts_no_duplicates.reset_index(drop=True)

In [25]:
unified_extra_pts_no_duplicates.head(10)

Unnamed: 0,Student Name,Student Contact
0,Al,al@gmail.com
1,Dominic,dominic@gmail.com
2,Mark,mark@gmail.com
3,Shane,shane@gmail.com
4,Al,al@gmail.com
5,Dominic,dominic@gmail.com
6,Mark,mark@gmail.com
7,Shane,shane@gmail.com
8,Al,al@gmail.com
9,Dominic,dominic@gmail.com


In [26]:
output = {}
output['Types'] = unified_extra_pts_no_duplicates.dtypes
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output)

Unnamed: 0,Types
Student Name,object
Student Contact,object


In [27]:
output = {}
output['Columns'] = unified_extra_pts_no_duplicates.columns.values.tolist()
pd.set_option('display.max_colwidth', -1)
pd.DataFrame(data=output)

Unnamed: 0,Columns
0,Student Name
1,Student Contact


In [28]:
unified_extra_pts_no_duplicates.drop_duplicates(subset=['Student Name '])
unified_extra_pts_no_duplicates.shape

(12, 2)

In [29]:
unified_extra_pts_no_duplicates.head(10)

Unnamed: 0,Student Name,Student Contact
0,Al,al@gmail.com
1,Dominic,dominic@gmail.com
2,Mark,mark@gmail.com
3,Shane,shane@gmail.com
4,Al,al@gmail.com
5,Dominic,dominic@gmail.com
6,Mark,mark@gmail.com
7,Shane,shane@gmail.com
8,Al,al@gmail.com
9,Dominic,dominic@gmail.com


In [30]:
calif_sum['Student Name'] = calif_sum['Student Contact'].apply(xcolumnlookup, args= (unified_extra_pts['Student Contact'], unified_extra_pts['Student Name ']))

In [31]:
calif_sum.head(13)

Unnamed: 0,Student Contact,Course Name,Grade,Grade + Extra,Student Name
0,al@gmail.com,Biology,220,230.0,Al
1,dominic@gmail.com,Biology,221,221.0,Dominic
2,mark@gmail.com,Biology,238,238.0,Mark
3,shane@gmail.com,Biology,256,256.0,Shane


In [32]:
reordered_df = calif_sum[['Student Name','Student Contact', 'Course Name', 'Grade', 'Grade + Extra']]

In [33]:
reordered_df.head(15)

Unnamed: 0,Student Name,Student Contact,Course Name,Grade,Grade + Extra
0,Al,al@gmail.com,Biology,220,230.0
1,Dominic,dominic@gmail.com,Biology,221,221.0
2,Mark,mark@gmail.com,Biology,238,238.0
3,Shane,shane@gmail.com,Biology,256,256.0


## Exporting Data

In [34]:
reordered_df.to_csv(path_or_buf='./_output/sum_grades.csv', index=False)

## END