# NumPy Data Explorer using Student Dataset

This project explores a student performance dataset using NumPy. It demonstrates
array creation, indexing, slicing, mathematical and statistical operations,
reshaping, broadcasting, saving/loading arrays, and performance comparison with
Python lists.


In [22]:
import numpy as np
import time

# ---------------- FILE PATH ----------------
file_path = "student_data(project1).csv"


# ---------------- LOAD & CLEAN DATA USING NUMPY ----------------
def load_student_data():
    raw_data = np.genfromtxt(
        file_path,
        delimiter=",",
        skip_header=1,
        dtype=float
    )

    # Select only numeric mark columns (last 6 columns)
    numeric_data = raw_data[:, -6:]

    # Replace NaN values with 0
    numeric_data = np.nan_to_num(numeric_data)

    return numeric_data


# ---------------- INDEXING & SLICING ----------------
def indexing_and_slicing(data_frame):
    print("\n--- Indexing & Slicing ---")
    print("First student record:", data_frame[0])
    print("First subject marks of all students:", data_frame[:, 0])
    print("First 3 students:\n", data_frame[:3])


# ---------------- TOTAL MARKS ----------------
def total_marks_of_student(index, data_frame):
    return np.sum(data_frame[index])


# ---------------- MAX PERFORMANCE ----------------
def max_performance(data_frame):
    best_student = 0
    highest_marks = total_marks_of_student(0, data_frame)

    for i in range(len(data_frame)):
        marks = total_marks_of_student(i, data_frame)
        if marks > highest_marks:
            highest_marks = marks
            best_student = i

    print(f"\nTop performer: Student {best_student + 1} with {highest_marks} marks")


# ---------------- MIN PERFORMANCE ----------------
def min_performance(data_frame):
    worst_student = 0
    lowest_marks = total_marks_of_student(0, data_frame)

    for i in range(len(data_frame)):
        marks = total_marks_of_student(i, data_frame)
        if marks < lowest_marks:
            lowest_marks = marks
            worst_student = i

    print(f"Lowest performer: Student {worst_student + 1} with {lowest_marks} marks")


# ---------------- MEAN & STATISTICS ----------------
def mean_analysis(data_frame):
    print("\n--- Mean & Statistical Analysis ---")
    for i in range(len(data_frame)):
        avg = np.mean(data_frame[i])
        print(f"Average marks of Student {i + 1}: {avg}")

    print("Overall class mean:", np.mean(data_frame))
    print("Maximum marks:", np.max(data_frame))
    print("Minimum marks:", np.min(data_frame))
    print("Standard Deviation:", np.std(data_frame))
    print("Variance:", np.var(data_frame))


# ---------------- RESHAPING & BROADCASTING ----------------
def reshape_and_broadcast(data_frame):
    print("\n--- Reshaping & Broadcasting ---")

    # Reshape ONE subject safely (6 elements)
    subject_data = data_frame[:6, 0]
    reshaped = subject_data.reshape(3, 2)
    print("Reshaped Data (one subject):\n", reshaped)

    # Broadcasting
    bonus_marks = data_frame + 5
    print("Marks after adding bonus:\n", bonus_marks[:3])


# ---------------- SAVE & LOAD ----------------
def save_and_load(data_frame):
    np.save("student_marks.npy", data_frame)
    loaded_data = np.load("student_marks.npy")
    print("\nLoaded NumPy data:\n", loaded_data[:3])


# ---------------- PERFORMANCE COMPARISON ----------------
def performance_comparison(data_frame):
    print("\n--- Performance Comparison ---")

    python_list = data_frame.tolist()

    start = time.time()
    python_list = [[x + 1 for x in row] for row in python_list]
    print("Python List Time:", time.time() - start)

    start = time.time()
    numpy_array = data_frame + 1
    print("NumPy Array Time:", time.time() - start)


# ---------------- MAIN FUNCTION ----------------
def main():
    data_frame = load_student_data()
    print("Student Dataset Loaded:\n", data_frame)

    indexing_and_slicing(data_frame)
    max_performance(data_frame)
    min_performance(data_frame)
    mean_analysis(data_frame)
    reshape_and_broadcast(data_frame)
    save_and_load(data_frame)
    performance_comparison(data_frame)


# ---------------- RUN PROGRAM ----------------
if __name__ == "__main__":
    main()


Student Dataset Loaded:
 [[ 1.  3.  6.  5.  6.  6.]
 [ 1.  3.  4.  5.  5.  6.]
 [ 3.  3. 10.  7.  8. 10.]
 ...
 [ 3.  3.  3. 10.  8.  7.]
 [ 4.  5.  0. 11. 12. 10.]
 [ 3.  5.  5.  8.  9.  9.]]

--- Indexing & Slicing ---
First student record: [1. 3. 6. 5. 6. 6.]
First subject marks of all students: [1. 1. 3. 1. 2. 2. 1. 1. 1. 1. 2. 1. 3. 2. 1. 2. 2. 1. 4. 3. 1. 1. 3. 4.
 1. 3. 2. 4. 1. 5. 4. 1. 1. 1. 1. 1. 1. 1. 1. 1. 2. 4. 1. 1. 2. 1. 4. 1.
 2. 1. 3. 1. 4. 3. 4. 1. 1. 1. 1. 1. 3. 5. 1. 4. 4. 2. 5. 2. 3. 3. 1. 1.
 4. 2. 4. 3. 1. 3. 1. 2. 3. 2. 1. 3. 3. 3. 2. 3. 1. 5. 3. 3. 3. 1. 1. 1.
 1. 1. 2. 1. 5. 1. 1. 1. 1. 1. 1. 1. 5. 1. 1. 1. 1. 1. 1. 2. 1. 1. 4. 2.
 2. 2. 2. 4. 1. 2. 1. 1. 2. 5. 2. 2. 3. 4. 1. 1. 4. 1. 3. 1. 1. 2. 1. 3.
 2. 2. 1. 1. 1. 5. 5. 5. 3. 1. 1. 1. 3. 5. 2. 4. 2. 4. 4. 4. 5. 1. 4. 1.
 1. 1. 4. 1. 3. 1. 1. 4. 4. 4. 4. 1. 3. 2. 3. 3. 2. 3. 2. 2. 3. 5. 1. 1.
 5. 4. 1. 1. 2. 5. 3. 2. 5. 3. 3. 2. 1. 4. 2. 1. 4. 1. 2. 5. 1. 4. 2. 3.
 4. 4. 4. 1. 2. 1. 1. 5. 1. 1. 3. 3. 5. 2. 