# Data Loading and Preparation
Import pandas to read the purine.csv file and clean the data if necessary.

In [7]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Read CSV file
df = pd.read_csv('data/purine.csv')

# Sort by Total Purines in descending order
df_sorted = df.sort_values(by='Total Purines (mg uric acid/100 g)', ascending=False)

# Create figure with larger size
plt.figure(figsize=(15, 8))

# Create bar plot for top 20 foods
sns.barplot(data=df_sorted.head(20), 
            x='Food', 
            y='Total Purines (mg uric acid/100 g)',
            palette='viridis')

# Customize the plot
plt.xticks(rotation=45, ha='right')
plt.title('Top 20 Foods with Highest Purine Content', pad=20)
plt.xlabel('Food Items')
plt.ylabel('Total Purines (mg uric acid/100 g)')

# Adjust layout to prevent label cutoff
plt.tight_layout()

# Show plot
plt.show()

ParserError: Error tokenizing data. C error: Expected 7 fields in line 4, saw 9


# Basic Statistical Analysis
Calculate summary statistics of purine content across food categories.

In [None]:
# Calculate summary statistics for purine content
summary_stats = df['purine_content'].describe()

# Display the summary statistics
summary_stats

# Group the data by food category and calculate the mean purine content for each category
category_means = df.groupby('food_category')['purine_content'].mean().sort_values(ascending=False)

# Display the mean purine content for each category
category_means

# Sort and Rank Foods by Purine Content
Create a sorted DataFrame based on purine content from highest to lowest.

In [None]:
# Sort the DataFrame by purine content from highest to lowest
sorted_df = df.sort_values(by='purine_content', ascending=False)

# Display the sorted DataFrame
sorted_df.head()

# Plot the sorted purine content for visualization
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 8))
plt.bar(sorted_df['food_name'], sorted_df['purine_content'], color='skyblue')
plt.xlabel('Food Name')
plt.ylabel('Purine Content')
plt.title('Foods Ranked by Purine Content')
plt.xticks(rotation=90)
plt.show()

# Create Bar Plot Visualization
Use matplotlib or seaborn to create a bar plot showing foods ranked by purine content.

In [None]:
# Plot the sorted purine content for visualization
import matplotlib.pyplot as plt
import seaborn as sns

# Set the style for the plot
sns.set(style="whitegrid")

# Create a bar plot using seaborn
plt.figure(figsize=(14, 10))
bar_plot = sns.barplot(x='purine_content', y='food_name', data=sorted_df, palette='viridis')

# Set the labels and title for the plot
bar_plot.set_xlabel('Purine Content')
bar_plot.set_ylabel('Food Name')
bar_plot.set_title('Foods Ranked by Purine Content')

# Display the plot
plt.show()

# Create Interactive Visualization
Use plotly to create an interactive bar chart with hover information showing detailed food information.

In [None]:
# Import plotly library for interactive visualization
import plotly.express as px

# Create an interactive bar chart with hover information
fig = px.bar(sorted_df, 
             x='food_name', 
             y='purine_content', 
             hover_data=['food_category', 'purine_content'], 
             labels={'purine_content': 'Purine Content', 'food_name': 'Food Name'},
             title='Interactive Visualization of Foods Ranked by Purine Content')

# Update layout for better visualization
fig.update_layout(xaxis_tickangle=-90, 
                  xaxis_title='Food Name', 
                  yaxis_title='Purine Content',
                  title={'x':0.5})

# Show the interactive plot
fig.show()