<a href="https://colab.research.google.com/github/Kunal-Ahirrao/Task_05_descriptive_llm/blob/main/Basic_Stats.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [5]:
import pandas as pd
import numpy as np
import os
import json

# Load datasets (assumes you've uploaded files)
game_df = pd.read_csv('/content/game_results.csv')
team_df = pd.read_csv('/content/team_stats.csv')

# Preprocessing
game_df['Goals_For'] = game_df['Score'].str.split('-').str[0].astype(int)
game_df['Goals_Against'] = game_df['Score'].str.split('-').str[1].astype(int)
game_df['Goal_Diff'] = game_df['Goals_For'] - game_df['Goals_Against']

# Basic statistics
total_games = len(game_df)
wins = (game_df['W/L'] == 'W').sum()
losses = (game_df['W/L'] == 'L').sum()
avg_goals_scored = game_df['Goals_For'].mean()
avg_goals_conceded = game_df['Goals_Against'].mean()
total_goals_scored = game_df['Goals_For'].sum()
total_goals_conceded = game_df['Goals_Against'].sum()
most_attended_game = game_df.loc[game_df['Attendance'].idxmax()].to_dict()

# Win/Loss extremes
closest_win = game_df[game_df['W/L'] == 'W'].iloc[game_df[game_df['W/L'] == 'W']['Goal_Diff'].abs().argsort()[:1]].to_dict('records')[0]
closest_loss = game_df[game_df['W/L'] == 'L'].iloc[game_df[game_df['W/L'] == 'L']['Goal_Diff'].abs().argsort()[:1]].to_dict('records')[0]
biggest_win = game_df[game_df['W/L'] == 'W'].iloc[game_df[game_df['W/L'] == 'W']['Goal_Diff'].argsort()[::-1][:1]].to_dict('records')[0]
biggest_loss = game_df[game_df['W/L'] == 'L'].iloc[game_df[game_df['W/L'] == 'L']['Goal_Diff'].argsort()[:1]].to_dict('records')[0]

# Team stats breakdown
team_totals = team_df.set_index('Metric').T.to_dict('records')[0]

# Summary object
summary_stats = {
    "Total Games": total_games,
    "Wins": wins,
    "Losses": losses,
    "Average Goals Scored": round(avg_goals_scored, 2),
    "Average Goals Conceded": round(avg_goals_conceded, 2),
    "Total Goals Scored": total_goals_scored,
    "Total Goals Conceded": total_goals_conceded,
    "Most Attended Game": most_attended_game,
    "Closest Win": closest_win,
    "Closest Loss": closest_loss,
    "Biggest Win": biggest_win,
    "Biggest Loss": biggest_loss,
    "Team Totals": team_totals
}

# JSON fix for NumPy types
def convert_np(obj):
    if isinstance(obj, (np.integer, np.floating)):
        return obj.item()
    elif isinstance(obj, dict):
        return {k: convert_np(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_np(i) for i in obj]
    return obj

# Save results
output_dir = 'lacrosse_output'
os.makedirs(output_dir, exist_ok=True)

with open(os.path.join(output_dir, 'lacrosse_summary_stats.json'), 'w') as f:
    json.dump(convert_np(summary_stats), f, indent=4)

print("✅ Summary stats saved successfully.")

# Save detailed summaries to text file
with open(os.path.join(output_dir, 'lacrosse_top_games.txt'), 'w') as f:
    f.write("Top 5 Highest Scoring Games:\n")
    top_scoring_games = game_df.sort_values(by='Goals_For', ascending=False).head(5)
    f.write(top_scoring_games[['Date', 'Opponent', 'Score', 'Goals_For']].to_string(index=False))

    f.write("\n\nTop 5 Most Attended Games:\n")
    top_attendance_games = game_df.sort_values(by='Attendance', ascending=False).head(5)
    f.write(top_attendance_games[['Date', 'Opponent', 'Attendance']].to_string(index=False))

    f.write("\n\nTop 5 Closest Wins:\n")
    closest_wins = game_df[game_df['W/L'] == 'W'].sort_values(by='Goal_Diff').head(5)
    f.write(closest_wins[['Date', 'Opponent', 'Score', 'Goal_Diff']].to_string(index=False))

    f.write("\n\nTop 5 Closest Losses:\n")
    closest_losses = game_df[game_df['W/L'] == 'L'].iloc[game_df[game_df['W/L'] == 'L']['Goal_Diff'].abs().argsort()].head(5)
    f.write(closest_losses[['Date', 'Opponent', 'Score', 'Goal_Diff']].to_string(index=False))

    f.write("\n\nTop 5 Biggest Wins:\n")
    biggest_wins = game_df[game_df['W/L'] == 'W'].sort_values(by='Goal_Diff', ascending=False).head(5)
    f.write(biggest_wins[['Date', 'Opponent', 'Score', 'Goal_Diff']].to_string(index=False))

print("✅ Text summary file saved as lacrosse_top_games.txt.")



✅ Summary stats saved successfully.
✅ Text summary file saved as lacrosse_top_games.txt.
