In [None]:
import numpy as np
import matplotlib.pyplot as plt
camp_data = 10**np.random.normal(size=500)
_, bins = np.histogram(np.log10(camp_data + 1), bins='auto')
plt.hist(camp_data, bins=10**bins);
plt.gca().set_xscale("log")
plt.gca().set_yscale("log")

In [None]:
# Code to check for correlation
camp_data.corr()

In [None]:
# Dictionary to store lists of zip codes for each region
region_zip_codes = {}
# Loop through each unique two-digit prefix in the Zip column
for prefix in camp_data['Zip'].apply(lambda x: str(x)[:2]).unique():
    # Filter the dataframe for the current region
    region_data = camp_data[camp_data['Zip'].apply(lambda x: str(x).startswith(prefix))]
    # Extract the zip codes for the current region
    zip_codes = region_data['Zip'].tolist()
    # Store the zip codes in the dictionary
    region_zip_codes["Region_" + prefix] = zip_codes

In [None]:
# Create a scatterplot
import seaborn as sns
plt.figure(figsize=(12, 8))
sns.scatterplot(data=camp_data, x='Region', y='Cost', palette='viridis')
plt.title('Scatterplot of Cost by Region')
plt.xlabel('Region')
plt.ylabel('Cost')
plt.show()

In [None]:
import seaborn as sns
import matplotlib.pyplot as plt
# Replace non-numeric values with NaN
camp_data['Cost'] = pd.to_numeric(camp_data['Cost'], errors='coerce')
# Fill NaN values with the mean cost
mean_cost = camp_data['Cost'].mean()
camp_data['Cost'].fillna(mean_cost, inplace=True)
# Order the regions by mean cost from highest to lowest
order = camp_data.groupby('Region')['Cost'].mean().sort_values(ascending=False).index
# Create a horizontal bar plot of mean Cost by Region
plt.figure(figsize=(12, 8))
sns.barplot(data=camp_data, y='Region', x='Cost', ci=None, orient='h', order=order)
plt.title('Mean Cost by Region (Highest to Lowest)')
plt.xlabel('Mean Cost')
plt.ylabel('Region')
plt.show()

In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
# List of state codes for Utah and its neighboring Regions
utah_and_neighbors = ['84', '83', '82', '80', '87', '85', '88']
# Filter data for regions in Utah and its neighboring states
utah_and_neighbors_data = camp_data[camp_data['Region'].isin(utah_and_neighbors)]
# Set up a box plot
plt.figure(figsize=(12, 8))
sns.boxplot(x='Region', y='Cost', data=utah_and_neighbors_data)
plt.title('Box Plot of Cost by Region in Utah and Bordering States')
plt.xlabel('Region')
plt.ylabel('Cost')
plt.show()

In [None]:
from wordcloud import WordCloud
park_code_text = ' '.join(camp_data['Park Code'])
wordcloud = WordCloud(width=800, height=400, background_color='white').generate(park_code_text)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
plt.show()

In [None]:
import matplotlib.pyplot as plt
top_park_codes = camp_data['Park Code'].value_counts().nlargest(10)  # Top 10 Park Codes
plt.figure(figsize=(12, 6))
top_park_codes.plot(kind='bar')
plt.title('Top Park Codes by Frequency')
plt.xlabel('Park Code')
plt.ylabel('Frequency')
plt.show()