In [2]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
import plotly.graph_objects as go

In [None]:
# Load cleaned data
data = pd.read_csv('C:/Users/mihir/OneDrive/Desktop/Assignment9/ms_data.csv')
data['visit_date'] = pd.to_datetime(data['visit_date'])

# Scatter plot of age vs walking speed with regression line
plt.figure(figsize=(10, 6))
sns.lmplot(x='age', y='walking_speed', data=data, aspect=1.5)
plt.title('Age vs Walking Speed with Regression Line')
plt.xlabel('Age')
plt.ylabel('Walking Speed')
plt.show()

# Box plots by education level
plt.figure(figsize=(10, 6))
sns.boxplot(x='education_level', y='walking_speed', data=data)
plt.title('Walking Speed by Education Level')
plt.xlabel('Education Level')
plt.ylabel('Walking Speed')
plt.show()

# Line plot showing education age interaction
plt.figure(figsize=(10, 6))
sns.lineplot(x='age', y='walking_speed', hue='education_level', data=data)
plt.title('Education Age Interaction on Walking Speed')
plt.xlabel('Age')
plt.ylabel('Walking Speed')
plt.show()

# Bar plot of mean costs by insurance type
plt.figure(figsize=(10, 6))
sns.barplot(x='insurance_type', y='visit_cost', data=data, ci='sd')
plt.title('Mean Costs by Insurance Type')
plt.xlabel('Insurance Type')
plt.ylabel('Visit Cost')
plt.show()

# Box plots showing cost distributions
plt.figure(figsize=(10, 6))
sns.boxplot(x='insurance_type', y='visit_cost', data=data)
plt.title('Cost Distributions by Insurance Type')
plt.xlabel('Insurance Type')
plt.ylabel('Visit Cost')
plt.show()

# Pair plot of key variables
plt.figure(figsize=(10, 6))
sns.pairplot(data, vars=['age', 'walking_speed', 'visit_cost'], hue='education_level')
plt.title('Pair Plot of Key Variables')
plt.show()

# Faceted plots by education/insurance
g = sns.FacetGrid(data, col='education_level', row='insurance_type', margin_titles=True)
g.map(sns.scatterplot, 'age', 'walking_speed')
g.add_legend()
plt.show()

# Time trends where relevant
plt.figure(figsize=(10, 6))
data['year'] = data['visit_date'].dt.year
sns.lineplot(x='year', y='walking_speed', hue='education_level', data=data)
plt.title('Time Trends of Walking Speed by Education Level')
plt.xlabel('Year')
plt.ylabel('Walking Speed')
plt.show()



In [None]:
# Using Interactive Plots
# Cleaned data
data = pd.read_csv('C:/Users/mihir/OneDrive/Desktop/Assignment9/ms_data.csv')
data['visit_date'] = pd.to_datetime(data['visit_date'])

# Scatter plot of age vs walking speed with regression line
fig = px.scatter(data, x='age', y='walking_speed', trendline='ols', title='Age vs Walking Speed with Regression Line')
fig.update_layout(xaxis_title='Age', yaxis_title='Walking Speed')
fig.show()

# Box plots by education level
fig = px.box(data, x='education_level', y='walking_speed', title='Walking Speed by Education Level')
fig.update_layout(xaxis_title='Education Level', yaxis_title='Walking Speed')
fig.show()

# Line plot showing education age interaction
fig = px.line(data, x='age', y='walking_speed', color='education_level', title='Education Age Interaction on Walking Speed')
fig.update_layout(xaxis_title='Age', yaxis_title='Walking Speed')
fig.show()

# Bar plot of mean costs by insurance type
fig = px.bar(data, x='insurance_type', y='visit_cost', error_y='visit_cost', title='Mean Costs by Insurance Type')
fig.update_layout(xaxis_title='Insurance Type', yaxis_title='Visit Cost')
fig.show()

# Box plots showing cost distributions
fig = px.box(data, x='insurance_type', y='visit_cost', title='Cost Distributions by Insurance Type')
fig.update_layout(xaxis_title='Insurance Type', yaxis_title='Visit Cost')
fig.show()

# Pair plot of key variables
fig = px.scatter_matrix(data, dimensions=['age', 'walking_speed', 'visit_cost'], color='education_level', title='Pair Plot of Key Variables')
fig.update_layout(diagonal_visible=False)
fig.show()

# Faceted plots by education/insurance
fig = px.scatter(data, x='age', y='walking_speed', facet_col='education_level', facet_row='insurance_type', title='Faceted Plots by Education/Insurance')
fig.update_layout(xaxis_title='Age', yaxis_title='Walking Speed')
fig.show()

# Time trends where relevant
data['year'] = data['visit_date'].dt.year
fig = px.line(data, x='year', y='walking_speed', color='education_level', title='Time Trends of Walking Speed by Education Level')
fig.update_layout(xaxis_title='Year', yaxis_title='Walking Speed')
fig.show()