In [37]:
import seaborn as sns
import pandas as pd
import matplotlib.pyplot as plt
import io
import base64

In [38]:
def create_bar_plot(df: pd.DataFrame, categorical_column: str, overall_avg: pd.Series, response_dict: dict):
    plt.figure(figsize=(12, 6))
    
    # Calculate means for each score by category
    math_means = df.groupby(categorical_column)['math score'].mean()
    reading_means = df.groupby(categorical_column)['reading score'].mean()
    writing_means = df.groupby(categorical_column)['writing score'].mean()
    
    # Set bar positions
    x = range(len(df[categorical_column].unique()))
    width = 0.25
    
    # Create bars
    plt.bar([i - width for i in x], math_means, width, label='Math')
    plt.bar(x, reading_means, width, label='Reading')
    plt.bar([i + width for i in x], writing_means, width, label='Writing')
    
    # Add overall average points
    plt.plot([i - width for i in x], [overall_avg['math score']] * len(x), 'ro', label='Overall Math')
    plt.plot(x, [overall_avg['reading score']] * len(x), 'go', label='Overall Reading')
    plt.plot([i + width for i in x], [overall_avg['writing score']] * len(x), 'bo', label='Overall Writing')

    # Finalize plot
    plt.title(f'Average Scores by {categorical_column}')  
    plt.xticks(x, df[categorical_column].unique(), rotation=45)
    plt.legend()
    plt.tight_layout()
    buffer = io.BytesIO()
    plt.savefig(buffer, format='png')
    buffer.seek(0)
    image = base64.b64encode(buffer.getvalue()).decode('utf-8')
    plt.close()
    return image

# response_dict = {}

# # Calculate overall average scores
# score_columns = ['math score', 'reading score', 'writing score']
# categorical_columns = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
# overall_avg = df[score_columns].mean()

# create_bar_plot(df, 'gender', overall_avg, response_dict)


In [39]:
def process_overall_avg(overall_avg: pd.Series, response_dict: dict):
    # Create bar plot for overall averages
    plt.figure(figsize=(10, 6))
    overall_avg.plot(kind='bar')
    plt.title('Overall Average Scores by Subject')
    plt.ylabel('Average Score')
    plt.xticks(rotation=0)
    plt.tight_layout()

    # save the plot to a buffer and convert to base64
    buffer = io.BytesIO()
    plt.savefig(buffer, format='png')
    buffer.seek(0)
    image = base64.b64encode(buffer.getvalue()).decode('utf-8')
    
    response = {'data': overall_avg.round(2).to_dict(), 'image': image}
    plt.close()
    return response

def process_file(df: pd.DataFrame):
    score_columns = ['math score', 'reading score', 'writing score']
    categorical_columns = ['gender', 'race/ethnicity', 'parental level of education', 'lunch', 'test preparation course']
    overall_avg = df[score_columns].mean()

    response_dict = {}
    response_dict['overall'] = process_overall_avg(overall_avg, response_dict)
    response_dict['columns'] = []
    for col in categorical_columns:
        response_dict['columns'].append({col: {'data': df.groupby(col)[score_columns].mean().round(2).to_dict(), 'image': create_bar_plot(df, col, overall_avg, response_dict)}})
    return response_dict


In [40]:
file_path = "StudentsPerformance.csv"

df = pd.read_csv(file_path)
response_dict = process_file(df)

In [41]:
print(response_dict)

{'overall': {'data': {'math score': 66.09, 'reading score': 69.17, 'writing score': 68.05}, 'image': 'iVBORw0KGgoAAAANSUhEUgAAA+gAAAJYCAYAAADxHswlAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAARK1JREFUeJzt3QecFPX9P/4PUlUEBJWiYI9gSxQVsRcisRuJ3VhDjCFGJWrkm0RjiSRGRU3sBWOLJZaISVRCjC1oFDWWKGIFpRgLICigsv/H+/N77P3vjiJl4Ya95/PxGLmd3Z35zNztuK/5tCalUqmUAAAAgAa1XMPuHgAAAAgCOgAAABSAgA4AAAAFIKADAABAAQjoAAAAUAACOgAAABSAgA4AAAAFIKADAABAAQjoAAAAUAACOgAAABSAgA4AAAAFIKADAABAAQjoAAAAUAACOgAAABSAgA7AMuef//xnatKkSf637KijjkprrbVWg5aLZVf87ey1115pWf0s/OlPf/rK1/qMABSfgA7QCL388svp8MMPT6uvvnpq2bJl6tKlSzrssMPy+sZkq622yuHmiiuuaOiiFM7bb7+djj766LTuuuumVq1apU6dOqUddtghnXnmmQ1dtGXa//73v3TiiSem7t27p+WXXz6tttpq+e/wpz/9aZo2bVpalt16663p4osvbuhiACzTmjV0AQBYuu6+++50yCGHpPbt26djjz02rb322jmMXXfddbkW7rbbbkvf/va3U7UbM2ZMevrpp3ON4i233JKOP/74hi5SYbz++utpyy23zAHymGOOyedowoQJ6dlnn02/+c1v0llnndXQRVwmffTRR2mLLbZIU6dOzec1QvqHH36YXnjhhXyTKP4GW7duvcT2f80116TZs2

In [42]:
for item in response_dict['columns']:
    print(item)


{'gender': {'data': {'math score': {'female': 63.63, 'male': 68.73}, 'reading score': {'female': 72.61, 'male': 65.47}, 'writing score': {'female': 72.47, 'male': 63.31}}, 'image': 'iVBORw0KGgoAAAANSUhEUgAABLAAAAJYCAYAAABy5h8aAAAAOnRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjEwLjEsIGh0dHBzOi8vbWF0cGxvdGxpYi5vcmcvc2/+5QAAAAlwSFlzAAAPYQAAD2EBqD+naQAAaKtJREFUeJzt3QeYFdXZB/CXDqIgoHQQrIBiQwVRFBXFjoIllohKYi9ojEpibyA2NKLGHhMr2Es0iiX2gg17o4pAFCkinf2eM3y7soCIsrDD7u/3PPfbOzPnzpy5ez9z+e8576lQUFBQEAAAAACQUxVLuwMAAAAAsCQCLAAAAAByTYAFAAAAQK4JsAAAAADINQEWAAAAALkmwAIAAAAg1wRYAAAAAOSaAAsAAACAXBNgAQAAAJBrAiwAAAAAck2ABQAAAECuCbAAAAAAyDUBFgAAAAC5JsACAAAAINcEWAAA5ch5550XFSpUiG+//TbKm8MPPzxatGhR2t0AAH4DARYAlAPXXXddFlq0b9++tLuSO7NmzYqrr746Nttss6hVq1asvvrqseGGG8ZRRx0Vn3zySWl3DwCAiKhc2h0AAJa/O++8Mxt58sYbb8QXX3wR6667bml3KTd69OgR//73v+Oggw6KP/7xjzF79uwsuHrssceiY8eO0apVq9LuIgBAuSfAAoAybvjw4fHKK6/EAw88EEcffXQWZp177rkrtA/z5s3LRjpVr1498uTNN9/MgqqLL744/vKXvxQ7du2118akSZNWWF9mzJgRVatWjYoVDZBfGRQUFGS/sxo1apR2VwCgXPANCQ