In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import requests
import gmaps
import os

# Import API key
from config import gkey

In [2]:
# Read Meteorite_Landings.csv file
meteorite_landings_df = pd.read_csv('Meteorite_Landings.csv')
meteorite_landings_df.head(10)

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation
0,Aachen,1,Valid,L5,21.0,Fell,01/01/1880 12:00:00 AM,50.775,6.08333,"(50.775, 6.08333)"
1,Aarhus,2,Valid,H6,720.0,Fell,01/01/1951 12:00:00 AM,56.18333,10.23333,"(56.18333, 10.23333)"
2,Abee,6,Valid,EH4,107000.0,Fell,01/01/1952 12:00:00 AM,54.21667,-113.0,"(54.21667, -113.0)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,01/01/1976 12:00:00 AM,16.88333,-99.9,"(16.88333, -99.9)"
4,Achiras,370,Valid,L6,780.0,Fell,01/01/1902 12:00:00 AM,-33.16667,-64.95,"(-33.16667, -64.95)"
5,Adhi Kot,379,Valid,EH4,4239.0,Fell,01/01/1919 12:00:00 AM,32.1,71.8,"(32.1, 71.8)"
6,Adzhi-Bogdo (stone),390,Valid,LL3-6,910.0,Fell,01/01/1949 12:00:00 AM,44.83333,95.16667,"(44.83333, 95.16667)"
7,Agen,392,Valid,H5,30000.0,Fell,01/01/1814 12:00:00 AM,44.21667,0.61667,"(44.21667, 0.61667)"
8,Aguada,398,Valid,L6,1620.0,Fell,01/01/1930 12:00:00 AM,-31.6,-65.23333,"(-31.6, -65.23333)"
9,Aguila Blanca,417,Valid,L,1440.0,Fell,01/01/1920 12:00:00 AM,-30.86667,-64.55,"(-30.86667, -64.55)"


In [3]:
# Count the row number of the dataset
len(meteorite_landings_df)

45716

In [4]:
# Modify the dataframe to grab "year" - split with " " space
year_split = meteorite_landings_df["year"].str.split(" ", n = 1, expand = True) 
year_split.head()

Unnamed: 0,0,1
0,01/01/1880,12:00:00 AM
1,01/01/1951,12:00:00 AM
2,01/01/1952,12:00:00 AM
3,01/01/1976,12:00:00 AM
4,01/01/1902,12:00:00 AM


In [5]:
# Modify the year_split to grab only "year" data - split with "/"
date_split = year_split[0].str.split("/", n = 2, expand = True) 
date_split.head()

Unnamed: 0,0,1,2
0,1,1,1880
1,1,1,1951
2,1,1,1952
3,1,1,1976
4,1,1,1902


In [6]:
# Name the date_split[2] to "Year", and add the data column to meteorite_landings dataframe
meteorite_landings_df["Year"]= date_split[2]
meteorite_landings_df.head()

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,year,reclat,reclong,GeoLocation,Year
0,Aachen,1,Valid,L5,21.0,Fell,01/01/1880 12:00:00 AM,50.775,6.08333,"(50.775, 6.08333)",1880
1,Aarhus,2,Valid,H6,720.0,Fell,01/01/1951 12:00:00 AM,56.18333,10.23333,"(56.18333, 10.23333)",1951
2,Abee,6,Valid,EH4,107000.0,Fell,01/01/1952 12:00:00 AM,54.21667,-113.0,"(54.21667, -113.0)",1952
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,01/01/1976 12:00:00 AM,16.88333,-99.9,"(16.88333, -99.9)",1976
4,Achiras,370,Valid,L6,780.0,Fell,01/01/1902 12:00:00 AM,-33.16667,-64.95,"(-33.16667, -64.95)",1902


In [7]:
# Delete 'year' column and use 'Year' column for data analyses
del meteorite_landings_df["year"]
meteorite_landings_df.head()

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,reclat,reclong,GeoLocation,Year
0,Aachen,1,Valid,L5,21.0,Fell,50.775,6.08333,"(50.775, 6.08333)",1880
1,Aarhus,2,Valid,H6,720.0,Fell,56.18333,10.23333,"(56.18333, 10.23333)",1951
2,Abee,6,Valid,EH4,107000.0,Fell,54.21667,-113.0,"(54.21667, -113.0)",1952
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,16.88333,-99.9,"(16.88333, -99.9)",1976
4,Achiras,370,Valid,L6,780.0,Fell,-33.16667,-64.95,"(-33.16667, -64.95)",1902


In [8]:
# Sort the dataset with Year
sort_meteorite_landings_df = meteorite_landings_df.sort_values(["Year"], ascending=True)
sort_meteorite_landings_df.head(20)

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,reclat,reclong,GeoLocation,Year
704,Nogata,16988,Valid,L6,472.0,Fell,33.725,130.75,"(33.725, 130.75)",860
679,Narni,16914,Valid,Stone-uncl,,Fell,42.51667,12.51667,"(42.51667, 12.51667)",920
278,Elbogen,7823,Valid,"Iron, IID",107000.0,Fell,50.18333,12.73333,"(50.18333, 12.73333)",1399
856,Rivolta de Bassi,22614,Valid,Stone-uncl,103.3,Fell,45.48333,9.51667,"(45.48333, 9.51667)",1490
283,Ensisheim,10039,Valid,LL6,127000.0,Fell,47.86667,7.35,"(47.86667, 7.35)",1491
1043,Valdinoce,24146,Valid,Stone-uncl,,Fell,44.06667,12.1,"(44.06667, 12.1)",1495
730,Oliva-Gandia,18012,Valid,Stone-uncl,,Fell,39.0,-0.03333,"(39.0, -0.03333)",1519
5365,Campo del Cielo,5247,Valid,"Iron, IAB-MG",50000000.0,Found,-27.46667,-60.58333,"(-27.46667, -60.58333)",1575
174,Castrovillari,5295,Valid,Stone-uncl,15000.0,Fell,39.8,16.2,"(39.8, 16.2)",1583
26174,Morito,16745,Valid,"Iron, IIIAB",10100000.0,Found,27.05,-105.43333,"(27.05, -105.43333)",1600


In [9]:
# Set up a bin to create graphs based on Year
# But first, check data types
sort_meteorite_landings_df.dtypes

name            object
id               int64
nametype        object
recclass        object
mass (g)       float64
fall            object
reclat         float64
reclong        float64
GeoLocation     object
Year            object
dtype: object

In [40]:
# Change Year datatypes to integer
sort_meteorite_landings_df["Year"] = pd.to_numeric(sort_meteorite_landings_df["Year"])
sort_meteorite_landings_df.dtypes

name             object
id                int64
nametype         object
recclass         object
mass (g)        float64
fall             object
reclat          float64
reclong         float64
GeoLocation      object
Year            float64
Year Range     category
dtype: object

In [28]:
# Drop missing data row
clean_meteorite_landings_df = sort_meteorite_landings_df.dropna(how="any")

In [29]:
# Groupby the dataset with 'fall' and 'found' meteorites
#fall_meteorite_df = clean_meteorite_landings_df.groupby("fall")
#fall_meteorite_df.head()

In [41]:
# Set up a new data frame for Data Analyses
#year_data_df = pd.DataFrame({"Mass": [mass (g)], "Latitude": [reclat], "Longitude": [reclong], "Year": [Year]})

In [34]:
# Set up bins for Years
year_bins = [0, 999.999, 1699.999, 1799.999, 1899.999, 1999.999, 2020]
group_names = ["<1000", "1000-1699", "1700-1799", "1800-1899", "1900-1999", "2000-2020"]

In [35]:
# Add a new column named Year Ramge amd binnning based off the range
year_data_df["Year Range"] = pd.cut(year_data_df["Year"], year_bins, labels=group_names)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  


In [36]:
del year_data_df["Year"]
year_data_df

Unnamed: 0,name,id,nametype,recclass,mass (g),fall,reclat,reclong,GeoLocation,Year Range
704,Nogata,16988,Valid,L6,472.0,Fell,33.72500,130.75000,"(33.725, 130.75)",<1000
278,Elbogen,7823,Valid,"Iron, IID",107000.0,Fell,50.18333,12.73333,"(50.18333, 12.73333)",1000-1699
856,Rivolta de Bassi,22614,Valid,Stone-uncl,103.3,Fell,45.48333,9.51667,"(45.48333, 9.51667)",1000-1699
283,Ensisheim,10039,Valid,LL6,127000.0,Fell,47.86667,7.35000,"(47.86667, 7.35)",1000-1699
5365,Campo del Cielo,5247,Valid,"Iron, IAB-MG",50000000.0,Found,-27.46667,-60.58333,"(-27.46667, -60.58333)",1000-1699
...,...,...,...,...,...,...,...,...,...,...
30776,Northwest Africa 7857,57422,Valid,LL6,246.0,Found,0.00000,0.00000,"(0.0, 0.0)",2000-2020
30775,Northwest Africa 7856,57421,Valid,LL6,517.0,Found,0.00000,0.00000,"(0.0, 0.0)",2000-2020
30774,Northwest Africa 7855,57420,Valid,H4,916.0,Found,0.00000,0.00000,"(0.0, 0.0)",2000-2020
30762,Northwest Africa 7812,57258,Valid,Angrite,46.2,Found,0.00000,0.00000,"(0.0, 0.0)",2000-2020
