# Q1: Processing Data

In [17]:
# The raw input data from the case study 
students_data = [
    "id:101, name: Ali, dept: CS, attendance:88, marks: [80,75,90], feedback:Excellent",
    "id:102, name:Sara, dept: IT, attendance:72, marks: [60,65,58], feedback:Helpful",
    "id:103, name: Hamza, dept: CS, attendance:95, marks: [90,92,88], feedback: Very Good",
    "id:104, name: Maryam, dept: Math, attendance:67, marks: [55,60,50], feedback:Average",
    "id:105,name: Usman, dept: IT, attendance: 85, marks: [70,68,72], feedback: Helpful"
]

parsed_students = []

print("--- Q1: Processing Data ---")

for record in students_data:
    try:
        # Step A: Split the long string by commas to get individual fields
        # We assume the delimiter is a comma followed by a space ", " or just ","
        parts = record.split(',') 
        
        student_info = {}
        
        for part in parts:
            # Split each part by ':' to get key and value (e.g., "id" and "101")
            if ':' in part:
                key, value = part.split(':', 1) # Split only on the first colon
                key = key.strip() # Remove extra spaces
                value = value.strip()
                
                # Step B: Convert specific data types [cite: 14]
                if key == 'id':
                    student_info['id'] = int(value)
                elif key == 'attendance':
                    student_info['attendance'] = int(value)
                elif key == 'marks':
                    # Clean the brackets [] and split numbers by comma
                    clean_marks = value.strip('[]')
                    # Convert string numbers "80" to integer 80
                    student_info['marks'] = [int(m) for m in clean_marks.split(',')]
                else:
                    # Keep name, dept, feedback as strings
                    student_info[key] = value
        
        parsed_students.append(student_info)
        
    except ValueError:
        # Step C: Error handling [cite: 15]
        print(f"Error processing record: {record}")

# Let's see the clean data
print(f"Successfully processed {len(parsed_students)} students.")

--- Q1: Processing Data ---
Successfully processed 5 students.


# Q2: Department Attendance

In [18]:
print("\n--- Q2: Department Attendance ---")
dept_attendance = {} # Key: Dept name, Value: List of attendance numbers

for s in parsed_students:
    dept = s['dept']
    att = s['attendance']
    
    if dept not in dept_attendance:
        dept_attendance[dept] = []
    dept_attendance[dept].append(att)

for dept, att_list in dept_attendance.items():
    avg_att = sum(att_list) / len(att_list)
    print(f"{dept}: {avg_att:.1f}%")


--- Q2: Department Attendance ---
CS: 91.5%
IT: 78.5%
Math: 67.0%


# Q3: Low Attendance Identification

In [19]:
print("\n--- Q3: At-Risk Students (<75% Attendance) ---")
for s in parsed_students:
    if s['attendance'] < 75:
        print(f"WARNING: {s['name']} has low attendance ({s['attendance']}%)")


--- Q3: At-Risk Students (<75% Attendance) ---


# Q4 & Q8: Averages & Performance Categories

In [20]:
print("\n--- Q4 & Q8: Student Averages & Categories ---")
for s in parsed_students:
    # Q4: Calculate Average
    marks = s['marks']
    avg_marks = sum(marks) / len(marks)
    s['average_score'] = avg_marks # Save this for later use
    
    # Q8: Categorize [cite: 30-34]
    category = ""
    if avg_marks >= 85:
        category = "Excellent"
    elif avg_marks >= 70:
        category = "Good"
    elif avg_marks >= 60:
        category = "Average"
    else:
        category = "Needs Improvement"
        
    print(f"{s['name']}: Avg {avg_marks:.1f} - {category}")


--- Q4 & Q8: Student Averages & Categories ---
Ali: Avg 80.0 - Good
Sara: Avg 60.0 - Average
Hamza: Avg 90.0 - Excellent
Maryam: Avg 55.0 - Needs Improvement
Usman: Avg 70.0 - Good


# Q5: Top Student by Department

In [21]:
print("\n--- Q5: Top Students ---")
top_students = {} # Key: Dept, Value: (Name, Score)

for s in parsed_students:
    dept = s['dept']
    avg = s['average_score']
    name = s['name']
    
    # If we haven't seen this dept yet, OR this student beats the current best
    if dept not in top_students or avg > top_students[dept][1]:
        top_students[dept] = (name, avg)

for dept, (name, score) in top_students.items():
    print(f"Top in {dept}: {name} ({score:.1f})")


--- Q5: Top Students ---
Top in CS: Hamza (90.0)
Top in IT: Usman (70.0)
Top in Math: Maryam (55.0)


# Q6: Unique Course Count

In [22]:
print("\n--- Q6: Unique Departments/Courses ---")
# A 'set' automatically removes duplicates
unique_courses = set([s['dept'] for s in parsed_students]) 
print(f"Total unique departments found: {len(unique_courses)} {unique_courses}")


--- Q6: Unique Departments/Courses ---
Total unique departments found: 3 {'Math', 'CS', 'IT'}


# Q7: Feedback Analysis

In [23]:
print("\n--- Q7: Feedback Analysis ---")
helpful_count = 0
all_comments = []

for s in parsed_students:
    comment = s['feedback'].lower() # Convert to lowercase [cite: 28]
    all_comments.append(comment)
    
    if "helpful" in comment:
        helpful_count += 1

print(f"Count of 'helpful': {helpful_count}")
print(f"All comments: {all_comments}")


--- Q7: Feedback Analysis ---
Count of 'helpful': 2
All comments: ['excellent', 'helpful', 'very good', 'average', 'helpful']


# Q9: Department Summary

In [24]:
print("\n--- Q9: Department Summary Report ---")
# We reuse data calculated in Q2 and Q5
for dept in unique_courses:
    count = len(dept_attendance[dept])
    avg_att = sum(dept_attendance[dept]) / count
    top_student = top_students[dept][0]
    
    print(f"Dept: {dept} | Students: {count} | Avg Att: {avg_att:.1f}% | Top: {top_student}")


--- Q9: Department Summary Report ---
Dept: Math | Students: 1 | Avg Att: 67.0% | Top: Maryam
Dept: CS | Students: 2 | Avg Att: 91.5% | Top: Hamza
Dept: IT | Students: 2 | Avg Att: 78.5% | Top: Usman


# Q10: Student-Course Matching

In [25]:
print("\n--- Q10: Student-Course Matching ---")
# Sample course list (created since one wasn't provided in data source)
available_courses = ["Python 101", "Data Science", "Calculus I", "Network Security"]

# We use 'zip' to pair them up. 
# If lists are uneven length, zip stops at the shortest one (handling the mismatch logic).
matched_pairs = list(zip([s['name'] for s in parsed_students], available_courses))

for student, course in matched_pairs:
    print(f"{student} is enrolled in {course}")

# Check for mismatch [cite: 39]
if len(parsed_students) != len(available_courses):
    diff = abs(len(parsed_students) - len(available_courses))
    print(f"Note: {diff} students/courses could not be matched due to shortage.")


--- Q10: Student-Course Matching ---
Ali is enrolled in Python 101
Sara is enrolled in Data Science
Hamza is enrolled in Calculus I
Maryam is enrolled in Network Security
Note: 1 students/courses could not be matched due to shortage.
