In [54]:
import pandas as pd

In [55]:
meteorite_data = pd.read_csv('meteorites.csv')

In [56]:
meteorite_data.head()

Unnamed: 0,name,id,name_type,class,mass,fall,year,lat,long,geolocation
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775, 6.08333)"
1,Aarhus,2,Valid,H6,720.0,Fell,1951.0,56.18333,10.23333,"(56.18333, 10.23333)"
2,Abee,6,Valid,EH4,107000.0,Fell,1952.0,54.21667,-113.0,"(54.21667, -113.0)"
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)"
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,"(-33.16667, -64.95)"


In [57]:
meteorite_data.isnull().sum()

name              0
id                0
name_type         0
class             0
mass            131
fall              0
year            291
lat            7315
long           7315
geolocation    7315
dtype: int64

In [58]:
# Drop rows with missing values
print(meteorite_data.shape)
meteorite_data = meteorite_data.dropna()
print(meteorite_data.shape)

(45716, 10)
(38115, 10)


In [59]:
# Incorrect data points according to kaggle -> https://www.kaggle.com/datasets/nasa/meteorite-landings
meteorite_data = meteorite_data[(meteorite_data['year'] >= 860) & (meteorite_data['year'] <= 2016)]
meteorite_data = meteorite_data[(meteorite_data['long'] <= 180) & (meteorite_data['long'] >= -180)]
meteorite_data = meteorite_data[(meteorite_data['lat'] != 0) & (meteorite_data['lat'] != 0)]
meteorite_data.shape

(31705, 10)

In [60]:
meteorite_data.to_csv('meteorites_clean.csv', index=False)

In [61]:
# Change the year from float to int so that vega lite can recognise it
meteorite_data_clean = pd.read_csv('meteorites_clean.csv')
meteorite_data_clean['year'] = pd.to_numeric(meteorite_data_clean['year'], downcast='unsigned', errors='coerce')
meteorite_data_clean.to_csv('meteorites_clean_2.csv', index=False)

In [62]:
import shapefile

# Load the shapefile
shp_path = 'ne_10m_admin_0_countries.shp'
dbf_path = 'ne_10m_admin_0_countries.dbf'
sf = shapefile.Reader(shp=shp_path, dbf=dbf_path)

# Extract the polygons for each continent
continents = {}
for sr in sf.shapeRecords():
    if sr.record['CONTINENT'] not in continents:
        continents[sr.record['CONTINENT']] = []
    continents[sr.record['CONTINENT']].append(sr.shape.points)

def get_continent(lat, lon):
    for continent, polygons in continents.items():
        for polygon in polygons:
            if is_inside_polygon(lat, lon, polygon):
                return continent
    return None

def is_inside_polygon(lat, lon, polygon):
    num_vertices = len(polygon)
    inside = False
    for i in range(num_vertices):
        j = (i + 1) % num_vertices
        if ((polygon[i][1] > lat) != (polygon[j][1] > lat)) and \
           (lon < (polygon[j][0] - polygon[i][0]) * (lat - polygon[i][1]) / (polygon[j][1] - polygon[i][1]) + polygon[i][0]):
            inside = not inside
    return inside

meteorite_data['Continent'] = meteorite_data.apply(lambda row: get_continent(row['lat'], row['long']), axis=1)

None


In [69]:
print(meteorite_data.shape)
meteorite_data_with_continent = meteorite_data.dropna()
print(meteorite_data_with_continent.shape)

(31705, 11)
(18802, 11)


In [70]:
meteorite_data_with_continent.head()

Unnamed: 0,name,id,name_type,class,mass,fall,year,lat,long,geolocation,Continent
0,Aachen,1,Valid,L5,21.0,Fell,1880.0,50.775,6.08333,"(50.775, 6.08333)",Europe
3,Acapulco,10,Valid,Acapulcoite,1914.0,Fell,1976.0,16.88333,-99.9,"(16.88333, -99.9)",North America
4,Achiras,370,Valid,L6,780.0,Fell,1902.0,-33.16667,-64.95,"(-33.16667, -64.95)",Oceania
5,Adhi Kot,379,Valid,EH4,4239.0,Fell,1919.0,32.1,71.8,"(32.1, 71.8)",Asia
6,Adzhi-Bogdo (stone),390,Valid,LL3-6,910.0,Fell,1949.0,44.83333,95.16667,"(44.83333, 95.16667)",Asia


In [71]:
meteorite_data_with_continent.to_csv('meteorites_clean_with_continent.csv', index=False)

In [72]:
counts = meteorite_data_with_continent['Continent'].value_counts()
print(counts)


Antarctica                 9949
Africa                     2583
North America              2411
Asia                       2320
Oceania                     939
Europe                      501
South America                98
Seven seas (open ocean)       1
Name: Continent, dtype: int64
