In [255]:
import pandas as pd
import numpy as np
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.cluster.hierarchy import linkage, dendrogram
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA

In [256]:
Landings = pd.read_csv("Meteorite_Landings.csv")
Impacts = pd.read_csv("impacts.csv")
Orbit = pd.read_csv("orbits.csv")

In [257]:
Landings.head()

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775, 6.08333)"
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.18333, 10.23333)"
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,"(54.21667, -113.0)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)"
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,"(-33.16667, -64.95)"


In [258]:
Landings.describe()

Unnamed: 0,id,mass (g),year,reclat,reclong
count,45716.0,45585.0,45425.0,38401.0,38401.0
mean,26889.735104,13278.08,1991.828817,-39.12258,61.074319
std,16860.68303,574988.9,25.052766,46.378511,80.647298
min,1.0,0.0,860.0,-87.36667,-165.43333
25%,12688.75,7.2,1987.0,-76.71424,0.0
50%,24261.5,32.6,1998.0,-71.5,35.66667
75%,40656.75,202.6,2003.0,0.0,157.16667
max,57458.0,60000000.0,2101.0,81.16667,354.47333


In [259]:
Landings.isna().sum()

name              0
id                0
nametype          0
recclass          0
mass (g)        131
fall              0
year            291
reclat         7315
reclong        7315
GeoLocation    7315
dtype: int64

In [260]:
Landings["mass (g)"] = Landings["mass (g)"].fillna(0)

In [261]:
Landings.isna().sum()

name              0
id                0
nametype          0
recclass          0
mass (g)          0
fall              0
year            291
reclat         7315
reclong        7315
GeoLocation    7315
dtype: int64

In [262]:
Landings["fall"].value_counts()

Found    44609
Fell      1107
Name: fall, dtype: int64

In [263]:
Landings[Landings["fall"] == "Fell"]

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.77500,6.08333,"(50.775, 6.08333)"
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.18333, 10.23333)"
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.00000,"(54.21667, -113.0)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.90000,"(16.88333, -99.9)"
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95000,"(-33.16667, -64.95)"
...,...,...,...,...,...,...,...,...,...,...
1106,Zhuanghe,30408,Valid,H5,2900.0,Fell,1976.0,39.66667,122.98333,"(39.66667, 122.98333)"
1107,Zmenj,30411,Valid,Howardite,246.0,Fell,1858.0,51.83333,26.83333,"(51.83333, 26.83333)"
1108,Zomba,30412,Valid,L6,7500.0,Fell,1899.0,-15.18333,35.28333,"(-15.18333, 35.28333)"
1109,Zsadany,30413,Valid,H5,552.0,Fell,1875.0,46.93333,21.50000,"(46.93333, 21.5)"


In [264]:
Impacts.head()

Unnamed: 0,Object Name,Period Start,Period End,Possible Impacts,Cumulative Impact Probability,Asteroid Velocity,Asteroid Magnitude,Asteroid Diameter (km),Cumulative Palermo Scale,Maximum Palermo Scale,Maximum Torino Scale
0,2006 WP1,2017,2017,1,5.2e-09,17.77,28.3,0.007,-8.31,-8.31,0
1,2013 YB,2017,2046,23,7.6e-05,8.98,31.4,0.002,-6.6,-6.96,0
2,2008 US,2017,2062,30,1.6e-05,18.33,31.4,0.002,-6.48,-6.87,0
3,2010 VR139,2017,2076,24,2e-07,4.99,26.7,0.016,-6.83,-6.95,0
4,2015 ME131,2017,2096,85,2.3e-08,19.46,19.2,0.497,-3.85,-4.3,0


In [265]:
rel_plot = px.scatter_matrix(Impacts, dimensions=["Asteroid Velocity", "Asteroid Magnitude", "Asteroid Diameter (km)"], labels={"Asteroid Velocity":"Speed", "Asteroid Magnitude": "Magnitude", "Asteroid Diameter (km)": "Size"}, color="Possible Impacts")
rel_plot.show()

In [266]:
histogram = make_subplots(rows=3, cols=1, subplot_titles=('Possible Impacts','Asteroid Velocity', 'Asteroid Magnitude' ))

histogram.add_trace(go.Histogram(x=Impacts['Possible Impacts'], name="Possible Impacts"), row=1, col=1) 
histogram.add_trace(go.Histogram(x=Impacts['Asteroid Velocity'], name="Asteroid Velocity"), row=2, col=1) 
histogram.add_trace(go.Histogram(x=Impacts['Asteroid Magnitude'], name="Asteroid Magnitude"), row=3, col=1) 
histogram.update_layout(showlegend=False, title_text="Distribution of astroid features", width=1000, height=1000)

histogram.show()


In [267]:
YearsPlot = Impacts[Impacts["Period End"] <= 2199 ]


In [268]:
YearsPlot["Start"] = YearsPlot["Period Start"].apply(str)
YearsPlot["Finish"] = YearsPlot["Period End"].apply(str)



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [269]:
YearsPlot.head()

Unnamed: 0,Object Name,Period Start,Period End,Possible Impacts,Cumulative Impact Probability,Asteroid Velocity,Asteroid Magnitude,Asteroid Diameter (km),Cumulative Palermo Scale,Maximum Palermo Scale,Maximum Torino Scale,Start,Finish
0,2006 WP1,2017,2017,1,5.2e-09,17.77,28.3,0.007,-8.31,-8.31,0,2017,2017
1,2013 YB,2017,2046,23,7.6e-05,8.98,31.4,0.002,-6.6,-6.96,0,2017,2046
2,2008 US,2017,2062,30,1.6e-05,18.33,31.4,0.002,-6.48,-6.87,0,2017,2062
3,2010 VR139,2017,2076,24,2e-07,4.99,26.7,0.016,-6.83,-6.95,0,2017,2076
4,2015 ME131,2017,2096,85,2.3e-08,19.46,19.2,0.497,-3.85,-4.3,0,2017,2096


In [270]:
timeline = px.timeline(YearsPlot,  x_start="Start", x_end="Finish", y="Object Name")
timeline.show()

In [277]:
Impacts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 683 entries, 0 to 682
Data columns (total 11 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Object Name                    683 non-null    object 
 1   Period Start                   683 non-null    int64  
 2   Period End                     683 non-null    int64  
 3   Possible Impacts               683 non-null    int64  
 4   Cumulative Impact Probability  683 non-null    float64
 5   Asteroid Velocity              683 non-null    float64
 6   Asteroid Magnitude             683 non-null    float64
 7   Asteroid Diameter (km)         683 non-null    float64
 8   Cumulative Palermo Scale       683 non-null    float64
 9   Maximum Palermo Scale          683 non-null    float64
 10  Maximum Torino Scale           683 non-null    object 
dtypes: float64(6), int64(3), object(2)
memory usage: 58.8+ KB


In [271]:
Fallen = Landings[(Landings["fall"] == "Fell") & (Landings["mass (g)"] > 0)]
Fallen = Fallen.sort_values("year")

In [272]:
scatter_3d_no_size = px.scatter_3d(Fallen, x='year', y='reclat', z='reclong',
              color='recclass')
scatter_3d_no_size.show()

In [273]:
scatter_3d_size = px.scatter_3d(Fallen, x='year', y='reclat', z='reclong', size="mass (g)",
              color='recclass') 
scatter_3d_size.show()

In [274]:
Fallen["year"] = Fallen["year"].apply(int)

In [275]:
map_plot = px.scatter_geo(Fallen, lat=Fallen.reclat,
                    lon=Fallen.reclong,
                    color="recclass",
                    hover_name="name",
                    # size="mass (g)",
                    animation_frame="year",
                     projection="orthographic")
map_plot.show()

In [276]:
import streamlit as st
st.plotly_chart(rel_plot, use_container_width=True)

DeltaGenerator(_root_container=0, _provided_cursor=None, _parent=None, _block_type=None, _form_data=None)

In [278]:
Impacts.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 683 entries, 0 to 682
Data columns (total 11 columns):
 #   Column                         Non-Null Count  Dtype  
---  ------                         --------------  -----  
 0   Object Name                    683 non-null    object 
 1   Period Start                   683 non-null    int64  
 2   Period End                     683 non-null    int64  
 3   Possible Impacts               683 non-null    int64  
 4   Cumulative Impact Probability  683 non-null    float64
 5   Asteroid Velocity              683 non-null    float64
 6   Asteroid Magnitude             683 non-null    float64
 7   Asteroid Diameter (km)         683 non-null    float64
 8   Cumulative Palermo Scale       683 non-null    float64
 9   Maximum Palermo Scale          683 non-null    float64
 10  Maximum Torino Scale           683 non-null    object 
dtypes: float64(6), int64(3), object(2)
memory usage: 58.8+ KB


In [281]:
max(Impacts["Maximum Torino Scale"])

'0'