In [1]:
import pandas as pd
import altair as alt

In [2]:
df = pd.read_csv ('popularity.csv')
df.head()

Unnamed: 0,breed,2013,2014,2015,2016,2017,2018,2019,2020,2021,2022,Intelligence_by_obeyrate,size_by_avgheight,price
0,French Bulldog,0,0,0,0,2,2,2,4,4,5,30%,11.5,2500
1,Labrador Retriever,5,5,5,5,5,5,5,5,5,4,95%,22.5,1725
2,Golden Retriever,3,3,3,3,3,3,3,2,3,3,95%,22.5,2000
3,German Shepherd Dog,4,4,4,4,4,4,4,3,2,2,95%,24.0,2500
4,Poodle,0,0,0,0,0,0,0,0,1,1,95%,20.0,1000


In [3]:
df_melted = pd.melt(df, id_vars=['breed', 'Intelligence_by_obeyrate', 'size_by_avgheight', 'price'], var_name='year', value_name='popularity')

In [4]:
df_melted.head()

Unnamed: 0,breed,Intelligence_by_obeyrate,size_by_avgheight,price,year,popularity
0,French Bulldog,30%,11.5,2500,2013,0
1,Labrador Retriever,95%,22.5,1725,2013,5
2,Golden Retriever,95%,22.5,2000,2013,3
3,German Shepherd Dog,95%,24.0,2500,2013,4
4,Poodle,95%,20.0,1000,2013,0


In [8]:
df1 = pd.read_csv ('NYC_Dog_Licensing_Dataset.csv')
df1.head()

Unnamed: 0,AnimalName,AnimalGender,AnimalBirthYear,BreedName,ZipCode,LicenseIssuedDate,LicenseExpiredDate,Extract Year
0,PAIGE,F,2014,American Pit Bull Mix / Pit Bull Mix,10035.0,09/12/2014,09/12/2017,2016
1,YOGI,M,2010,Boxer,10465.0,09/12/2014,10/02/2017,2016
2,ALI,M,2014,Basenji,10013.0,09/12/2014,09/12/2019,2016
3,QUEEN,F,2013,Akita Crossbreed,10013.0,09/12/2014,09/12/2017,2016
4,LOLA,F,2009,Maltese,10028.0,09/12/2014,10/09/2017,2016


In [9]:
breed_list = ['French Bulldog', 'Labrador Retriever', 'Golden Retriever', 'German Shepherd Dog', 'Poodle', 'Beagle', 'Bulldog']

# Filter the dataframe to include only the selected breeds
filtered_df = df1[df1['BreedName'].isin(breed_list)]

# Convert the 'LicenseIssuedDate' column to datetime format and extract the year
filtered_df['LicenseIssuedYear'] = pd.to_datetime(filtered_df['LicenseIssuedDate']).dt.year

# Group the filtered dataframe by year and breed, and count the number of licenses
breeds_per_year = filtered_df.groupby(['LicenseIssuedYear', 'BreedName']).size().reset_index(name='LicenseCount')

# Rename the columns
breeds_per_year = breeds_per_year.rename(columns={'LicenseIssuedYear': 'Year', 'BreedName': 'Breed'})

# Print the resulting DataFrame
breeds_per_year.head()

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  filtered_df['LicenseIssuedYear'] = pd.to_datetime(filtered_df['LicenseIssuedDate']).dt.year


Unnamed: 0,Year,Breed,LicenseCount
0,2014,Beagle,38
1,2014,Bulldog,2
2,2014,French Bulldog,9
3,2014,German Shepherd Dog,26
4,2014,Golden Retriever,29


In [10]:
breeds_per_year['Year'] = breeds_per_year['Year'].astype('str')
merged_df = pd.merge(breeds_per_year, df_melted, left_on=['Breed', 'Year'], right_on=['breed', 'year'])

# Print the resulting merged dataframe
merged_df.head(7)

Unnamed: 0,Year,Breed,LicenseCount,breed,Intelligence_by_obeyrate,size_by_avgheight,price,year,popularity
0,2014,Beagle,38,Beagle,10%,14.5,1350,2014,1
1,2014,Bulldog,2,Bulldog,10%,14.0,2000,2014,2
2,2014,French Bulldog,9,French Bulldog,30%,11.5,2500,2014,0
3,2014,German Shepherd Dog,26,German Shepherd Dog,95%,24.0,2500,2014,4
4,2014,Golden Retriever,29,Golden Retriever,95%,22.5,2000,2014,3
5,2014,Labrador Retriever,81,Labrador Retriever,95%,22.5,1725,2014,5
6,2014,Poodle,17,Poodle,95%,20.0,1000,2014,0


In [11]:
import altair as alt
import pandas as pd

# Define color scales
color_scale = alt.Scale(
    domain=['Beagle', 'Bulldog', 'French Bulldog', 'German Shepherd Dog', 'Golden Retriever','Labrador Retriever','Poodle'],
    range=['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2']
)
color = alt.Color('breed:N', scale=color_scale)

# Top scatter plot
breed_selector = alt.selection_multi(fields=['breed'], bind='legend')
brush = alt.selection(type='interval', encodings=['x'])
points = alt.Chart(merged_df).mark_point().encode(
    x=alt.X('year:O', title='Year'),
    y=alt.Y('popularity:Q', title='Popularity'),
    color=alt.condition(breed_selector, color, alt.value('lightgray')),
    size=alt.Size('LicenseCount:Q', title='License Count')
).properties(
    width=600,
    height=400
).add_selection(
    brush, breed_selector
)

# Bottom stacked bar chart
bars = alt.Chart(merged_df).mark_bar().encode(
    x=alt.X('sum(LicenseCount):Q', stack='zero', title='License Count'),
    y=alt.Y('year:O', title='Year'),
    color=alt.Color('breed:N', scale=color_scale)
).properties(
    width=600,
    height=100
).transform_filter(
    brush
).add_selection(
    breed_selector
)

chart = alt.vconcat(points, bars, data=merged_df)
chart1 = chart
chart1

In [12]:
# Load the data
data = pd.DataFrame({
    "breed": ["French Bulldog", "Labrador Retriever", "Golden Retriever", "German Shepherd Dog", "Poodle", "Beagle", "Bulldog"],
    "popularity": [8, 7, 6, 5, 4, 3, 1],
    "Intelligence_by_obeyrate": ["30%", "95%", "95%", "95%", "95%", "10%", "10%"],
    "size_by_avgheight": [11.5, 22.5, 22.5, 24.0, 20.0, 14.5, 14],
    "price": [2500, 1725, 2000, 2500, 1000, 1350, 2000]
})

# Define selection for breed dropdown
breed_dropdown = alt.binding_select(options=data["breed"].tolist(), name="Breed: ")
breed_selection = alt.selection_single(fields=["breed"], bind=breed_dropdown, name="SelectedBreed")

# Define base chart
base_chart = alt.Chart(data).add_selection(breed_selection)

# Size vs. Popularity chart
size_pop_chart = base_chart.mark_circle(size=100).encode(
    x=alt.X("size_by_avgheight:Q", title="Size by Avg. Height"),
    y=alt.Y("popularity:O", title="Popularity"),
    color=alt.condition(breed_selection, alt.Color("breed:N", legend=None), alt.value("lightgray"))
).properties(title="Size vs. Popularity")

# Intelligence vs. Popularity chart
intelligence_pop_chart = base_chart.mark_circle(size=100).encode(
    x=alt.X("Intelligence_by_obeyrate:O", title="Intelligence by Obey Rate"),
    y=alt.Y("popularity:O", title="Popularity"),
    color=alt.condition(breed_selection, alt.Color("breed:N", legend=None), alt.value("lightgray"))
).properties(title="Intelligence vs. Popularity")

# Price vs. Popularity chart
price_pop_chart = base_chart.mark_circle(size=100).encode(
    x=alt.X("price:Q", title="Price"),
    y=alt.Y("popularity:O", title="Popularity"),
    color=alt.condition(breed_selection, alt.Color("breed:N", legend=None), alt.value("lightgray"))
).properties(title="Price vs. Popularity")

# Combine all charts
charts = size_pop_chart | intelligence_pop_chart | price_pop_chart

# Show the chart
charts
chart2 = charts
chart2

In [13]:
top_names = df1["AnimalName"].value_counts().nlargest(12)

# Create a DataFrame for the top names and their counts
name_counts = pd.DataFrame({
    "name": top_names.index,
    "count": top_names.values
})

# Create the bar chart
chart = alt.Chart(name_counts).mark_bar().encode(
    x=alt.X("count:Q", axis=alt.Axis(title="Count")),
    y=alt.Y("name:N", sort="-x", axis=alt.Axis(title="Name")),
    tooltip=["name", "count"]
).properties(
    title="Top 10 Most Common Dog Names"
)

chart

In [15]:
df2 = pd.read_csv('DOHMH_Dog_Bite_Data.csv')
df2 = df2.dropna(subset=['ZipCode'])
df2

Unnamed: 0,UniqueID,DateOfBite,Species,Breed,Age,Gender,SpayNeuter,Borough,ZipCode
0,1,January 01 2018,DOG,UNKNOWN,,U,False,Brooklyn,11220
2,3,January 06 2018,DOG,Pit Bull,,U,False,Brooklyn,11224
3,4,January 08 2018,DOG,Mixed/Other,4,M,False,Brooklyn,11231
4,5,January 09 2018,DOG,Pit Bull,,U,False,Brooklyn,11224
5,6,January 03 2018,DOG,BASENJI,4Y,M,False,Brooklyn,11231
...,...,...,...,...,...,...,...,...,...
22658,10276,December 24 2017,DOG,CHIWEENIE MIX,7,M,True,Staten Island,10303
22659,10277,December 24 2017,DOG,DUNKER,5,F,True,Staten Island,10303
22660,10278,December 21 2017,DOG,"Schnauzer, Miniature",10M,M,True,Staten Island,10312
22661,10279,December 28 2017,DOG,Mixed/Other,,F,False,Staten Island,10308


In [None]:
import json

# Aggregate by zipcode and breed
zipcode_breed_counts = (
    df2.groupby(['ZipCode', 'Breed'])
    .size()
    .reset_index(name="count")
    .sort_values(["ZipCode", "count"], ascending=[True, False])
    .groupby("ZipCode")
    .head(10)
)

# Load the GeoJSON data for NYC zipcodes
with open("nyc-zip-code-tabulation-areas-polygons.geojson") as f:
    geojson_data = json.load(f)

# Convert breed names to title case for better display
zipcode_breed_counts["Breed"] = zipcode_breed_counts["Breed"].str.title()

# Create a map of NYC zipcodes with the number of dog bites
map = (
    alt.Chart(alt.Data(values=geojson_data))
    .mark_geoshape(stroke="white", strokeWidth=0.5)
    .encode(
        color=alt.Color(
            "count:Q",
            scale=alt.Scale(scheme="reds"),
            legend=alt.Legend(title="Number of Dog Bites"),
        ),
        tooltip=["properties.postalCode:N", "count:Q"],
    )
    .transform_lookup(
        lookup="properties.postalCode",
        from_=alt.LookupData(zipcode_breed_counts, "ZipCode", ["count"]),
    )
    .properties(width=500, height=300)
    .project("identity")
)

# Create a chart of the top 10 dog breeds with the most bites
chart = (
    alt.Chart(zipcode_breed_counts.head(10))
    .mark_bar()
    .encode(
        x=alt.X("count:Q", title="Number of Dog Bites"),
        y=alt.Y("Breed:N", title="Dog Breed"),
        tooltip=["Breed:N", "count:Q"],
    )
)

# Combine the map and chart
map
chart

In [None]:
!pip install geopandas

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting geopandas
  Downloading geopandas-0.12.2-py3-none-any.whl (1.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.1/1.1 MB[0m [31m28.6 MB/s[0m eta [36m0:00:00[0m
Collecting fiona>=1.8
  Downloading Fiona-1.9.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (16.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m16.0/16.0 MB[0m [31m47.9 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting pyproj>=2.6.1.post1
  Downloading pyproj-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.7 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.7/7.7 MB[0m [31m106.6 MB/s[0m eta [36m0:00:00[0m
Collecting cligj>=0.5
  Downloading cligj-0.7.2-py3-none-any.whl (7.1 kB)
Collecting munch>=2.3.2
  Downloading munch-2.5.0-py2.py3-none-any.whl (10 kB)
Collecting click-plugins>=1.0
  Downloading click_plugins-1.1

In [None]:
gb = df2.groupby(['ZipCode']).size().reset_index(name='Bites')

In [None]:
gb

Unnamed: 0,ZipCode,Bites
0,01013,1
1,01720,1
2,01852,1
3,02301,1
4,02631,1
...,...,...
514,90027,1
515,90066,1
516,94128,1
517,94591,1


In [None]:
bitesnyc = gb[(gb['ZipCode']>='10001')&(gb['ZipCode']<='11697')]

In [None]:
bitesnyc

Unnamed: 0,ZipCode,Bites
47,10001,30
48,10002,116
49,10003,101
50,10004,16
51,10005,17
...,...,...
327,11691,124
328,11692,53
329,11693,32
330,11694,46


In [None]:
!pip install folium

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
import folium

In [None]:
import geopandas as gpd

In [None]:
zipcode = gpd.read_file('ZIP_CODE_040114.shp')

In [None]:
zipcode['Latitude'] = zipcode['geometry'].centroid.y
zipcode['Longitude'] = zipcode['geometry'].centroid.x

In [None]:
zipcode

Unnamed: 0,geometry,Latitude,Longitude
0,"POLYGON ((1038098.252 188138.380, 1038141.936 ...",185580.221914,1.040696e+06
1,"POLYGON ((1001613.713 186926.440, 1002314.243 ...",183803.113142,1.001913e+06
2,"POLYGON ((1011174.276 183696.338, 1011373.584 ...",180768.944857,1.008365e+06
3,"POLYGON ((995908.365 183617.613, 996522.848 18...",180870.353320,9.970232e+05
4,"POLYGON ((991997.113 176307.496, 992042.798 17...",173656.048379,9.907853e+05
...,...,...,...
258,"POLYGON ((950767.507 172848.969, 950787.510 17...",170046.546303,9.521543e+05
259,"POLYGON ((1028453.995 167153.410, 1027813.010 ...",168862.386996,1.028656e+06
260,"POLYGON ((995877.318 203206.075, 995968.511 20...",199049.367644,9.945540e+05
261,"POLYGON ((997731.761 219560.922, 997641.948 21...",219676.996951,9.977352e+05


In [None]:
!pip install geopy

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


In [None]:
from geopy.geocoders import Nominatim
geolocator = Nominatim(user_agent='myapp')

In [None]:
zip_codes = bitesnyc['ZipCode'].tolist()

In [None]:
coords = []
for zip_code in zip_codes:
    location = geolocator.geocode(zip_code + ' New York City')
    if location is not None:
        coords.append([zip_code, location.latitude, location.longitude])



In [None]:
df_coords = pd.DataFrame(coords, columns=['ZipCode', 'Latitude', 'Longitude'])

In [None]:
df_coords

Unnamed: 0,ZipCode,Latitude,Longitude
0,10001,40.712728,-74.006015
1,10002,40.712728,-74.006015
2,10003,40.712728,-74.006015
3,10004,40.712728,-74.006015
4,10005,40.712728,-74.006015
...,...,...,...
277,11691,40.601575,-73.757753
278,11692,40.593842,-73.796725
279,11693,40.598996,-73.817981
280,11694,40.577418,-73.846741


In [None]:
df_merged = pd.merge(bitesnyc, df_coords, on='ZipCode')

In [None]:
df_merged

Unnamed: 0,ZipCode,Bites,Latitude,Longitude
0,10001,30,40.712728,-74.006015
1,10002,116,40.712728,-74.006015
2,10003,101,40.712728,-74.006015
3,10004,16,40.712728,-74.006015
4,10005,17,40.712728,-74.006015
...,...,...,...,...
277,11691,124,40.601575,-73.757753
278,11692,53,40.593842,-73.796725
279,11693,32,40.598996,-73.817981
280,11694,46,40.577418,-73.846741


In [None]:
nyc_coords = [40.730610, -73.935242]
m = folium.Map(location=nyc_coords, zoom_start=11)

In [None]:
def get_marker_properties(bites):
    if bites >= 100:
        return {'fillColor': '#d73027', 'color': '#d73027', 'radius': 15}
    elif bites >= 50:
        return {'fillColor': '#fc8d59', 'color': '#fc8d59', 'radius': 10}
    else:
        return {'fillColor': '#fee08b', 'color': '#fee08b', 'radius': 5}

# Add a marker for each zip code
for i, row in df_merged.iterrows():
    props = get_marker_properties(row['Bites'])
    folium.Marker(location=[row['Latitude'], row['Longitude']], 
                  popup=f"Zip Code: {row['ZipCode']}<br>Bites: {row['Bites']}", 
                  **props).add_to(m)

# Save the map to an HTML file
m.save('map.html')

In [16]:
# Count the number of bites by dog breed
breed_counts = df2.groupby('Breed')['UniqueID'].count().reset_index()
breed_counts.columns = ['Breed', 'BiteCount']

# Sort the breeds by the number of bites in descending order
breed_counts = breed_counts.sort_values('BiteCount', ascending=False)

# Get the 10 most and 10 least bites dogs
top_10 = breed_counts.head(10)
bottom_10 = breed_counts.tail(10)

# Concatenate the dataframes
combined_df = pd.concat([top_10, bottom_10])

# Create the Altair chart
chart = alt.Chart(combined_df).mark_bar().encode(
    x='BiteCount:Q',
    y=alt.Y('Breed:N', sort='-x'),
    color=alt.condition(
        alt.datum.BiteCount > 0,
        alt.value('steelblue'),  
        alt.value('lightgray')  
    ),
    tooltip=['Breed', 'BiteCount']
).properties(
    width=800,
    height=400
)

# Show the chart
chart
chart3 = chart
chart3

In [None]:
chartsall = chart1 | chart2 | chart3

In [None]:
chartsall.save("all.html")

In [17]:
chart1.save('chart1.html')
chart2.save('chart2.html')
chart3.save('chart3.html')

In [None]:
# save charts to separate HTML files
chart1.save('chart1.html')
chart2.save('chart2.html')
chart3.save('chart3.html')

# create a combined HTML file with buttons to show each chart
html = f'''
<!DOCTYPE html>
<html>
<head>
  <script src="https://cdn.jsdelivr.net/npm/vega@5"></script>
  <script src="https://cdn.jsdelivr.net/npm/vega-lite@5"></script>
  <script src="https://cdn.jsdelivr.net/npm/vega-embed@6"></script>
  <script type="text/javascript">
    function showChart(chartNum) {{
      var chartDivs = document.getElementsByClassName('chart');
      for (var i = 0; i < chartDivs.length; i++) {{
        chartDivs[i].style.display = 'none';
      }}
      document.getElementById('chart' + chartNum).style.display = 'block';
    }}
  </script>
</head>
<body>
  <button onclick="showChart(1)">Chart 1</button>
  <button onclick="showChart(2)">Chart 2</button>
  <button onclick="showChart(3)">Chart 3</button>
  <div id="chart1" class="chart"></div>
  <div id="chart2" class="chart" style="display:none"></div>
  <div id="chart3" class="chart" style="display:none"></div>
  <script type="text/javascript">
    vegaEmbed('#chart1', {chart1.to_dict()}, {{mode: "vega-lite"}});
    vegaEmbed('#chart2', {chart2.to_dict()}, {{mode: "vega-lite"}});
    vegaEmbed('#chart3', {chart3.to_dict()}, {{mode: "vega-lite"}});
  </script>
</body>
</html>
'''

# save the combined HTML file
with open('charts.html', 'w') as f:
    f.write(html)