In [15]:
import pandas as pd
import json

## Small-Bodies Dataset (`neos.csv`)

In [3]:
neos = pd.read_csv('data/neos.csv', nrows=3)

In [5]:
len(neos.columns)

75

- `pdes` - the primary designation of the NEO. This is a unique identifier in the database, and its "name" to computer systems.
- `name` - the International Astronomical Union (IAU) name of the NEO. This is its "name" to humans.
- `pha` - whether NASA has marked the NEO as a "Potentially Hazardous Asteroid," roughly meaning that it's large and can come quite close to Earth.
- `diameter` - the NEO's diameter (from an equivalent sphere) in kilometers.

In [6]:
neos_small = pd.read_csv('data/neos.csv', usecols=['pdes', 'name', 'diameter', 'pha'], nrows=3)

In [7]:
neos_small

Unnamed: 0,pdes,name,pha,diameter
0,433,Eros,N,16.84
1,719,Albert,N,
2,887,Alinda,N,4.2


### How many NEOs are in the `neos.csv` data set?

In [8]:
neos_4cols = pd.read_csv('data/neos.csv', usecols=['pdes', 'name', 'diameter', 'pha'])

  neos_4cols = pd.read_csv('data/neos.csv', usecols=['pdes', 'name', 'diameter', 'pha'])


In [9]:
neos_4cols.shape

(23967, 4)

### What is the primary designation of the first Near Earth Object in the `neos.csv` data set?

In [10]:
neos_small.iloc[0, 0]

np.int64(433)

### What is the diameter of the NEO whose name is "Apollo"?

In [11]:
neos_4cols[neos_4cols['name'] == 'Apollo']['diameter']

10    1.5
Name: diameter, dtype: float64

### How many NEOs have IAU names in the data set?

In [13]:
neos_4cols['name'].notnull().sum()

np.int64(343)

### How many NEOs have diameters in the data set?

In [14]:
neos_4cols['diameter'].notnull().sum()

np.int64(1268)

## Close Approach Dataset (`cad.json`)

- `des` - primary designation of the asteroid or comet (e.g., 443, 2000 SG344)
- `orbit_id` - orbit ID
- `jd` - time of close-approach (JD Ephemeris Time)
- `cd` - time of close-approach (formatted calendar date/time, in UTC)
- `dist` - nominal approach distance (au)
- `dist_min` - minimum (3-sigma) approach distance (au)
- `dist_max` - maximum (3-sigma) approach distance (au)
- `v_rel` - velocity relative to the approach body at close approach (km/s)
- `v_inf` - velocity relative to a massless body (km/s)
- `t_sigma_f` - 3-sigma uncertainty in the time of close-approach (formatted in days, hours, and minutes; days are not included if zero; example "13:02" is 13 hours 2 minutes; example "2_09:08" is 2 days 9 hours 8 minutes)
- `h` - absolute magnitude H (mag)

In [16]:
# Read the JSON file
with open('data/cad.json', 'r') as file:
    json_data = json.load(file)

In [18]:
cad = pd.DataFrame(json_data['data'], columns=json_data['fields'])

In [20]:
cad.head(3)

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
0,170903,105,2415020.50766961,1900-Jan-01 00:11,0.0921795123769547,0.0912006569517418,0.0931589328621254,16.7523040362574,16.7505784933163,01:00,18.1
1,2005 OE3,52,2415020.60601349,1900-Jan-01 02:33,0.414975519685102,0.414968315685577,0.414982724454678,17.918395877175,17.9180375373357,< 00:01,20.3
2,2006 XO4,15,2415020.634068074,1900-Jan-01 03:13,0.114291499199114,0.114272705486348,0.114310301346124,7.39720266467069,7.3940503943318,00:23,23.4


In [22]:
cad.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 406785 entries, 0 to 406784
Data columns (total 11 columns):
 #   Column     Non-Null Count   Dtype 
---  ------     --------------   ----- 
 0   des        406785 non-null  object
 1   orbit_id   406785 non-null  object
 2   jd         406785 non-null  object
 3   cd         406785 non-null  object
 4   dist       406785 non-null  object
 5   dist_min   406785 non-null  object
 6   dist_max   406785 non-null  object
 7   v_rel      406785 non-null  object
 8   v_inf      406766 non-null  object
 9   t_sigma_f  406785 non-null  object
 10  h          406277 non-null  object
dtypes: object(11)
memory usage: 34.1+ MB


### How many close approaches are in the `cad.json` data set?

In [21]:
cad.shape

(406785, 11)

### On `January 1st, 2000`, how close did the NEO whose primary designation is `"2015 CL"` pass by Earth?

In [23]:
# Convert 'cd' column to datetime
cad['cd'] = pd.to_datetime(cad['cd'])

# Filter entries with January 1st, 2000 and 'des' equal to "2015 CL"
filtered_cad = cad[(cad['cd'].dt.date == pd.to_datetime('2000-01-01').date()) & (cad['des'] == '2015 CL')]

In [24]:
filtered_cad

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
180844,2015 CL,7,2451544.575085225,2000-01-01 01:48:00,0.144929602021186,0.144894711605919,0.144964493657327,12.0338907050642,12.0323628689746,00:19,25.3


In [29]:
# Round the 'dist' column to 3 decimal places
filtered_cad['dist'].astype(float).round(3)

180844    0.145
Name: dist, dtype: float64

### On `January 1st, 2000`, how fast did the NEO whose primary designation is `"2002 PB"` pass by Earth?

In [30]:
filtered_cad = cad[(cad['cd'].dt.date == pd.to_datetime('2000-01-01').date()) & (cad['des'] == '2002 PB')]

In [31]:
filtered_cad

Unnamed: 0,des,orbit_id,jd,cd,dist,dist_min,dist_max,v_rel,v_inf,t_sigma_f,h
180845,2002 PB,22,2451544.656861311,2000-01-01 03:46:00,0.499221505520251,0.499198890779505,0.499244120869353,29.3862908945476,29.3861092695424,< 00:01,20.5


In [32]:
filtered_cad['v_rel'].astype(float).round(2)

180845    29.39
Name: v_rel, dtype: float64

This concludes our initial analysis of these datasets.