## 1. Load the dataset into a NumPy array

In [10]:
import numpy as np

In [11]:
# Load the dataset
data = np.genfromtxt('student_data_4.csv', delimiter=',', dtype=None, encoding=None, names=True)
# View the first few rows to verify loading
print(data[:15])


[( 1, 'Male', 28,  3,  82, 'Pass') ( 2, 'Female', 27,  2,  86, 'Pass')
 ( 3, 'Male', 35,  6,  62, 'Pass') ( 4, 'Female', 23,  6,  50, 'Fail')
 ( 5, 'Male', 37,  7,  50, 'Fail') ( 6, 'Female', 31,  5,  69, 'Pass')
 ( 7, 'Female', 29,  8,  51, 'Fail') ( 8, 'Female', 25,  7,  86, 'Pass')
 ( 9, 'Male', 30,  2,  46, 'Fail') (10, 'Female', 35,  1,  82, 'Pass')
 (11, 'Male', 34,  9,  56, 'Fail') (12, 'Male', 24,  5,  83, 'Pass')
 (13, 'Male', 22, 11,  86, 'Pass') (14, 'Male', 32,  5,  35, 'Fail')
 (15, 'Male', 21, 10, 100, 'Pass')]


## 2.Calculate statistics for the student data

### Average test score

In [12]:
# Extract the TestScore column
test_scores = data['TestScore']

# Calculate the average test score
average_test_score = np.mean(test_scores)
print(f"Average Test Score: {average_test_score}")


Average Test Score: 67.432


### Minimum and maximum test scores

In [13]:
# Calculate the minimum and maximum test scores
min_test_score = np.min(test_scores)
max_test_score = np.max(test_scores)
print(f"Minimum Test Score: {min_test_score}")
print(f"Maximum Test Score: {max_test_score}")


Minimum Test Score: 35
Maximum Test Score: 100


### Standard deviation of study hours

In [14]:
# Extract the StudyHours column
study_hours = data['StudyHours'].astype(int)

# Calculate the standard deviation of study hours
std_dev_study_hours = np.std(study_hours)
print(f"Standard Deviation of Study Hours: {std_dev_study_hours}")


Standard Deviation of Study Hours: 3.11227103575508


### Number of students who got the highest and lowest scores

In [15]:
# Calculate the number of students with the highest and lowest scores
num_highest_scores = np.sum(test_scores == max_test_score)
num_lowest_scores = np.sum(test_scores == min_test_score)
print(f"Number of Students with the Highest Score: {num_highest_scores}")
print(f"Number of Students with the Lowest Score: {num_lowest_scores}")


Number of Students with the Highest Score: 17
Number of Students with the Lowest Score: 8


### Youngest and oldest students, age and average test score

In [16]:
# Extract the Age column
ages = data['Age'].astype(int)

# Calculate the youngest and oldest students
youngest_age = np.min(ages)
oldest_age = np.max(ages)
print(f"Youngest Age: {youngest_age}")
print(f"Oldest Age: {oldest_age}")

# Calculate the average test score for the youngest and oldest students
avg_test_score_youngest = np.mean(test_scores[ages == youngest_age])
avg_test_score_oldest = np.mean(test_scores[ages == oldest_age])
print(f"Average Test Score of Youngest Students: {avg_test_score_youngest}")
print(f"Average Test Score of Oldest Students: {avg_test_score_oldest}")


Youngest Age: 21
Oldest Age: 38
Average Test Score of Youngest Students: 63.94117647058823
Average Test Score of Oldest Students: 62.604651162790695


### Average study hours, filtering on gender

In [17]:
# Extract the Gender column
genders = data['Gender']

# Calculate the average study hours for each gender
average_study_hours_male = np.mean(study_hours[genders == 'Male'])
average_study_hours_female = np.mean(study_hours[genders == 'Female'])
print(f"Average Study Hours (Male): {average_study_hours_male}")
print(f"Average Study Hours (Female): {average_study_hours_female}")


Average Study Hours (Male): 5.686390532544379
Average Study Hours (Female): 6.093306288032454


### Percentage of pass/fail

In [18]:
# Extract the Grade column
grades = data['Grade']

# Calculate the percentage of pass and fail
num_pass = np.sum(grades == 'Pass')
num_fail = np.sum(grades == 'Fail')
total_students = len(grades)

percentage_pass = (num_pass / total_students) * 100
percentage_fail = (num_fail / total_students) * 100
print(f"Percentage of Pass: {percentage_pass}")
print(f"Percentage of Fail: {percentage_fail}")


Percentage of Pass: 61.5
Percentage of Fail: 38.5
