In [1]:
# import requests

import requests
import pandas as pd

# Part 1: Data Ingestion
current_quiz_url = 'https://www.jsonkeeper.com/b/LLQT'
current_quiz_data = requests.get(current_quiz_url).json()

historical_quiz_url = 'https://api.jsonserve.com/XgAgFJ'
historical_quiz_data = requests.get(historical_quiz_url).json()

# Part 2: Data Preparation
current_df = pd.DataFrame(current_quiz_data)
historical_df = pd.DataFrame(historical_quiz_data)

# Clean 'accuracy' column: remove percentage symbols and convert to float
historical_df['accuracy'] = historical_df['accuracy'].str.replace('%', '')

# Handle potential conversion errors
historical_df['accuracy'] = pd.to_numeric(historical_df['accuracy'], errors='coerce')

# Fill NaN values with 0 (or any other appropriate value)
historical_df['accuracy'].fillna(0, inplace=True)

# Ensure 'incorrect_answers' is numeric
historical_df['incorrect_answers'] = pd.to_numeric(historical_df['incorrect_answers'], errors='coerce')
historical_df['incorrect_answers'].fillna(0, inplace=True)

# Check schema of historical_df
print(historical_df.columns)

# Part 3: Analyze Performance
accuracy_performance = historical_df.groupby('user_id').agg({'accuracy': 'mean'})
incorrect_answers_performance = historical_df.groupby('user_id').agg({'incorrect_answers': 'sum'})

# Part 4: Generate Insights
def generate_insights(user_id, historical_df):
    user_data = historical_df[historical_df['user_id'] == user_id]
    
    # Calculate average performance by accuracy
    avg_accuracy = user_data['accuracy'].mean()
    
    # Identify weak areas (questions with highest incorrect answers)
    weak_areas = user_data[user_data['incorrect_answers'] > user_data['incorrect_answers'].mean()]

    print(f"Avg Accuracy: {avg_accuracy}")
    print("Weak Areas:")
    print(weak_areas)
    
    return avg_accuracy, weak_areas

avg_accuracy, weak_areas = generate_insights('user_id', historical_df)

# Part 5: Create Recommendations
def create_recommendations(avg_accuracy, weak_areas):
    recommendations = []
    
    # Recommend focusing on improving overall accuracy
    if avg_accuracy < 0.5:
        recommendations.append("Focus on improving overall accuracy.")
    
    # Recommend addressing weak areas
    for _, row in weak_areas.iterrows():
        recommendations.append(f"Review question ID: {row['id']} to improve understanding.")
    
    return recommendations

recommendations = create_recommendations(avg_accuracy, weak_areas)
print("Recommendations:")
print(recommendations)






Index(['id', 'quiz_id', 'user_id', 'submitted_at', 'created_at', 'updated_at',
       'score', 'trophy_level', 'accuracy', 'speed', 'final_score',
       'negative_score', 'correct_answers', 'incorrect_answers', 'source',
       'type', 'started_at', 'ended_at', 'duration', 'better_than',
       'total_questions', 'rank_text', 'mistakes_corrected',
       'initial_mistake_count', 'response_map', 'quiz'],
      dtype='object')
Avg Accuracy: nan
Weak Areas:
Empty DataFrame
Columns: [id, quiz_id, user_id, submitted_at, created_at, updated_at, score, trophy_level, accuracy, speed, final_score, negative_score, correct_answers, incorrect_answers, source, type, started_at, ended_at, duration, better_than, total_questions, rank_text, mistakes_corrected, initial_mistake_count, response_map, quiz]
Index: []

[0 rows x 26 columns]
Recommendations:
[]
