In [1]:
import pandas as pd

In [3]:
def create_user_training_summary():
    df = pd.read_csv('../prep/PREP_TrainingPerformances.csv')

    # Grouping by employeeID and calculating aggregated stats
    user_summary = df.groupby(['employeeID', 'name']).agg(
        total_courses_taken=('courseID', 'nunique'),
        total_attempts=('attempt', 'sum'),
        average_score=('score', 'mean'),
        total_time_spent=('time', 'sum')
    ).reset_index()

    user_summary.to_csv('REPORT_User_Training_Summary.csv', index=False)
    print(user_summary)
create_user_training_summary()

   employeeID    name  total_courses_taken  total_attempts  average_score  \
0           1  Aswath                    4               8      43.333333   

   total_time_spent  
0             100.0  


In [4]:
def create_course_performance_report():
    df = pd.read_csv('../prep/PREP_TrainingPerformances.csv')

    # Grouping by courseID and course title
    course_performance = df.groupby(['courseID', 'course']).agg(
        total_participants=('employeeID', 'nunique'),
        average_score=('score', 'mean'),
        average_time_spent=('time', 'mean'),
        total_attempts=('attempt', 'sum')
    ).reset_index()

    course_performance.to_csv('REPORT_Course_Performance.csv', index=False)
    print(course_performance)

create_course_performance_report()

   courseID        course  total_participants  average_score  \
0       117            QA                   1           40.0   
1       119          Java                   1           70.0   
2       127    Javascript                   1           20.0   
3       135  Data Science                   1           30.0   

   average_time_spent  total_attempts  
0                10.0               1  
1                30.0               3  
2                 9.0               1  
3                10.5               3  


In [5]:
def create_manager_feedback_summary():
    df = pd.read_csv('../prep/PREP_Feedbacks.csv')

    # Grouping by managerId
    feedback_summary = df.groupby(['managerId']).agg(
        total_employees=('employeeID', 'nunique'),
        average_feedback_score=('aggregatedScore', 'mean')
    ).reset_index()

    feedback_summary.to_csv('REPORT_Manager_Feedback_Summary.csv', index=False)
    print(feedback_summary)
create_manager_feedback_summary()

  managerId  total_employees  average_feedback_score
0    JMD364                4                    40.0


In [7]:
def create_course_participation_by_designation():
    df = pd.read_csv('../prep/PREP_TrainingPerformances.csv')
    users_df = pd.read_csv('../prep/PREP_Users.csv')

    # Merge user data with training performance data using different column names
    merged_df = pd.merge(df, users_df, left_on='employeeID', right_on='id')

    print(merged_df)

    # Grouping by course and designation
    participation_by_designation = merged_df.groupby(['course', 'designation']).agg(
        total_participants=('employeeID_x', 'nunique'),
        average_score=('score', 'mean')
    ).reset_index()

    participation_by_designation.to_csv('REPORT_Course_Participation_by_Designation.csv', index=False)
    print(participation_by_designation)
create_course_participation_by_designation()


   Unnamed: 0_x  id_x  employeeID_x  name_x  courseID        course  score  \
0             0     4             1  Aswath       119          Java   80.0   
1             1     5             1  Aswath       135  Data Science   40.0   
2             2     6             1  Aswath       119          Java   60.0   
3             3     7             1  Aswath       117            QA   40.0   
4             4     8             1  Aswath       135  Data Science   20.0   
5             5     9             1  Aswath       127    Javascript   20.0   

   time  attempt                         createdAt  \
0  31.0        1  2024-10-03 08:19:14.878000+00:00   
1   5.0        1  2024-10-03 11:05:47.457000+00:00   
2  29.0        2  2024-10-04 05:28:58.807000+00:00   
3  10.0        1  2024-10-04 11:42:03.314000+00:00   
4  16.0        2  2024-10-04 13:09:37.540000+00:00   
5   9.0        1  2024-10-04 13:09:57.968000+00:00   

                          updatedAt  Unnamed: 0_y  id_y  name_y  \
0  2024

In [8]:
def create_training_efficiency_report():
    df = pd.read_csv('../prep/PREP_TrainingPerformances.csv')

    # Group by course and calculate average score and total time spent
    training_efficiency = df.groupby('course').agg(
        total_time_spent=('time', 'sum'),
        average_score=('score', 'mean')
    ).reset_index()

    training_efficiency.to_csv('REPORT_Training_Efficiency.csv', index=False)
    print(training_efficiency)
create_training_efficiency_report()


         course  total_time_spent  average_score
0  Data Science              21.0           30.0
1          Java              60.0           70.0
2    Javascript               9.0           20.0
3            QA              10.0           40.0


In [14]:
def create_training_attempt_analysis():
    df = pd.read_csv('../prep/PREP_TrainingPerformances.csv')

    # Group by employee and course to analyze attempts
    attempt_analysis = df.groupby(['employeeID', 'courseID']).agg(
        total_attempts=('attempt', 'sum'),
        final_score=('score', 'max')
    ).reset_index()

    attempt_analysis.to_csv('REPORT_Training_Attempt_Analysis.csv', index=False)
    print(attempt_analysis)
create_training_attempt_analysis()


   employeeID  courseID  total_attempts  final_score
0           1       117               1         40.0
1           1       119               3         80.0
2           1       127               1         20.0
3           1       135               3         40.0
