In [1]:
#Find out if you can observe a meteor shower in a specific city 
#If observation is possible, check the optimal date, direction, and time to observe.

In [2]:
import pandas as pd
import numpy as np

In [3]:
meteor_showers = pd.read_csv('data/meteorshowers.csv')
moon_phases = pd.read_csv('data/moonphases.csv')
constellations = pd.read_csv('data/constellations.csv')
cities = pd.read_csv('data/cities.csv')

In [4]:
meteor_showers.head()

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere
0,Lyrids,Lyra,april,april,21,april,22,northern,northern
1,Eta Aquarids,Aquarius,may,april,19,may,28,"northern, southern",southern
2,Orionids,Orion,october,october,2,november,7,"northern, southern","northern, southern"
3,Perseids,Perseus,august,july,14,august,24,northern,northern
4,Leonids,Leo,november,november,6,november,30,"northern, southern","northern, southern"


In [5]:
moon_phases.head()

Unnamed: 0,month,day,moonphase,specialevent
0,january,1,,
1,january,2,first quarter,
2,january,3,,
3,january,4,,
4,january,5,,


In [6]:
constellations.head()

Unnamed: 0,constellation,bestmonth,latitudestart,latitudeend,besttime,hemisphere
0,Lyra,august,90,-40,21:00,northern
1,Aquarius,october,65,-90,21:00,southern
2,Orion,january,85,-75,21:00,northern
3,Perseus,december,90,-35,21:00,northern
4,Leo,april,90,65,21:00,northern


In [7]:
cities.head()

Unnamed: 0,city,latitude,country
0,Abu Dhabi,24.47,United Arab Emirates
1,Abuja,9.07,Nigeria
2,Accra,5.55,Ghana
3,Adamstown,-25.07,Pitcairn Islands
4,Addis Ababa,9.02,Ethiopia


In [8]:
meteor_showers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 5 non-null      object
 1   radiant              5 non-null      object
 2   bestmonth            5 non-null      object
 3   startmonth           5 non-null      object
 4   startday             5 non-null      int64 
 5   endmonth             5 non-null      object
 6   endday               5 non-null      int64 
 7   hemisphere           5 non-null      object
 8   preferredhemisphere  5 non-null      object
dtypes: int64(2), object(7)
memory usage: 492.0+ bytes


In [9]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   month         366 non-null    object
 1   day           366 non-null    int64 
 2   moonphase     50 non-null     object
 3   specialevent  10 non-null     object
dtypes: int64(1), object(3)
memory usage: 11.6+ KB


In [10]:
constellations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   constellation  5 non-null      object
 1   bestmonth      5 non-null      object
 2   latitudestart  5 non-null      int64 
 3   latitudeend    5 non-null      int64 
 4   besttime       5 non-null      object
 5   hemisphere     5 non-null      object
dtypes: int64(2), object(4)
memory usage: 372.0+ bytes


In [11]:
cities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 256 entries, 0 to 255
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   city      256 non-null    object 
 1   latitude  256 non-null    float64
 2   country   256 non-null    object 
dtypes: float64(1), object(2)
memory usage: 6.1+ KB


In [12]:
meteor_showers.describe()

Unnamed: 0,startday,endday
count,5.0,5.0
mean,12.4,22.2
std,8.203658,9.066422
min,2.0,7.0
25%,6.0,22.0
50%,14.0,24.0
75%,19.0,28.0
max,21.0,30.0


In [13]:
moon_phases.describe()

Unnamed: 0,day
count,366.0
mean,15.756831
std,8.823592
min,1.0
25%,8.0
50%,16.0
75%,23.0
max,31.0


In [14]:
constellations.describe()

Unnamed: 0,latitudestart,latitudeend
count,5.0,5.0
mean,84.0,-35.0
std,10.839742,60.518592
min,65.0,-90.0
25%,85.0,-75.0
50%,90.0,-40.0
75%,90.0,-35.0
max,90.0,65.0


In [15]:
cities.describe()

Unnamed: 0,latitude
count,256.0
mean,16.74082
std,26.077121
min,-54.27
25%,0.3675
50%,15.48
75%,39.8175
max,78.2


In [16]:
moon_phases.isna().sum()

month             0
day               0
moonphase       316
specialevent    356
dtype: int64

# data processing

### Change column dtype (month name)

In [17]:
# check the type of data
meteor_showers.head()

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere
0,Lyrids,Lyra,april,april,21,april,22,northern,northern
1,Eta Aquarids,Aquarius,may,april,19,may,28,"northern, southern",southern
2,Orionids,Orion,october,october,2,november,7,"northern, southern","northern, southern"
3,Perseids,Perseus,august,july,14,august,24,northern,northern
4,Leonids,Leo,november,november,6,november,30,"northern, southern","northern, southern"


In [18]:
# Create a dictionary to convert month names to numbers
months = {
    'january' : 1,
    'february' : 2,
    'march' : 3,
    'april' : 4,
    'may' : 5,
    'june' : 6,
    'july' : 7,
    'august' : 8,
    'september' : 9,
    'october' : 10,
    'november' : 11,
    'december' : 12
}
months

{'january': 1,
 'february': 2,
 'march': 3,
 'april': 4,
 'may': 5,
 'june': 6,
 'july': 7,
 'august': 8,
 'september': 9,
 'october': 10,
 'november': 11,
 'december': 12}

In [19]:
# Convert the name of the month to a number using the map method
meteor_showers['bestmonth'] = meteor_showers['bestmonth'].map(months)
meteor_showers['startmonth'] = meteor_showers['startmonth'].map(months)
meteor_showers['endmonth'] = meteor_showers['endmonth'].map(months)
meteor_showers

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere
0,Lyrids,Lyra,4,4,21,4,22,northern,northern
1,Eta Aquarids,Aquarius,5,4,19,5,28,"northern, southern",southern
2,Orionids,Orion,10,10,2,11,7,"northern, southern","northern, southern"
3,Perseids,Perseus,8,7,14,8,24,northern,northern
4,Leonids,Leo,11,11,6,11,30,"northern, southern","northern, southern"


In [20]:
meteor_showers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 5 non-null      object
 1   radiant              5 non-null      object
 2   bestmonth            5 non-null      int64 
 3   startmonth           5 non-null      int64 
 4   startday             5 non-null      int64 
 5   endmonth             5 non-null      int64 
 6   endday               5 non-null      int64 
 7   hemisphere           5 non-null      object
 8   preferredhemisphere  5 non-null      object
dtypes: int64(5), object(4)
memory usage: 492.0+ bytes


In [21]:
# Convert the name of the month to a number using the map method
moon_phases['month'] = moon_phases['month'].map(months)
constellations['bestmonth'] = constellations['bestmonth'].map(months)


In [22]:
moon_phases

Unnamed: 0,month,day,moonphase,specialevent
0,1,1,,
1,1,2,first quarter,
2,1,3,,
3,1,4,,
4,1,5,,
...,...,...,...,...
361,12,27,,
362,12,28,,
363,12,29,full moon,
364,12,30,,


In [23]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   month         366 non-null    int64 
 1   day           366 non-null    int64 
 2   moonphase     50 non-null     object
 3   specialevent  10 non-null     object
dtypes: int64(2), object(2)
memory usage: 11.6+ KB


In [24]:
constellations

Unnamed: 0,constellation,bestmonth,latitudestart,latitudeend,besttime,hemisphere
0,Lyra,8,90,-40,21:00,northern
1,Aquarius,10,65,-90,21:00,southern
2,Orion,1,85,-75,21:00,northern
3,Perseus,12,90,-35,21:00,northern
4,Leo,4,90,65,21:00,northern


In [25]:
constellations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   constellation  5 non-null      object
 1   bestmonth      5 non-null      int64 
 2   latitudestart  5 non-null      int64 
 3   latitudeend    5 non-null      int64 
 4   besttime       5 non-null      object
 5   hemisphere     5 non-null      object
dtypes: int64(3), object(3)
memory usage: 372.0+ bytes


### Convert date information to Datetime (create startdate column)

In [26]:
# Convert the date information to datatime and save it in a new column of the meteor shower data frame, startdate, enddate
# Add startdate column
date = 2020 * 10000 + meteor_showers['startmonth'] * 100 + meteor_showers['startday']
meteor_showers['startdate'] = pd.to_datetime(date, format = '%Y%m%d')

meteor_showers

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere,startdate
0,Lyrids,Lyra,4,4,21,4,22,northern,northern,2020-04-21
1,Eta Aquarids,Aquarius,5,4,19,5,28,"northern, southern",southern,2020-04-19
2,Orionids,Orion,10,10,2,11,7,"northern, southern","northern, southern",2020-10-02
3,Perseids,Perseus,8,7,14,8,24,northern,northern,2020-07-14
4,Leonids,Leo,11,11,6,11,30,"northern, southern","northern, southern",2020-11-06


In [27]:
# Add enddate column
date = 2020 * 10000 + meteor_showers['endmonth'] * 100 + meteor_showers['endday']
meteor_showers['enddate'] = pd.to_datetime(date, format = '%Y%m%d')

meteor_showers

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere,startdate,enddate
0,Lyrids,Lyra,4,4,21,4,22,northern,northern,2020-04-21,2020-04-22
1,Eta Aquarids,Aquarius,5,4,19,5,28,"northern, southern",southern,2020-04-19,2020-05-28
2,Orionids,Orion,10,10,2,11,7,"northern, southern","northern, southern",2020-10-02,2020-11-07
3,Perseids,Perseus,8,7,14,8,24,northern,northern,2020-07-14,2020-08-24
4,Leonids,Leo,11,11,6,11,30,"northern, southern","northern, southern",2020-11-06,2020-11-30


In [28]:
meteor_showers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   name                 5 non-null      object        
 1   radiant              5 non-null      object        
 2   bestmonth            5 non-null      int64         
 3   startmonth           5 non-null      int64         
 4   startday             5 non-null      int64         
 5   endmonth             5 non-null      int64         
 6   endday               5 non-null      int64         
 7   hemisphere           5 non-null      object        
 8   preferredhemisphere  5 non-null      object        
 9   startdate            5 non-null      datetime64[ns]
 10  enddate              5 non-null      datetime64[ns]
dtypes: datetime64[ns](2), int64(5), object(4)
memory usage: 572.0+ bytes


In [29]:
# Convert date information to datatime and add date column to moon_phases data frame
date = 2020 * 10000 + moon_phases['month'] * 100 + moon_phases['day']
moon_phases['date'] = pd.to_datetime(date, format = '%Y%m%d')


moon_phases

Unnamed: 0,month,day,moonphase,specialevent,date
0,1,1,,,2020-01-01
1,1,2,first quarter,,2020-01-02
2,1,3,,,2020-01-03
3,1,4,,,2020-01-04
4,1,5,,,2020-01-05
...,...,...,...,...,...
361,12,27,,,2020-12-27
362,12,28,,,2020-12-28
363,12,29,full moon,,2020-12-29
364,12,30,,,2020-12-30


In [30]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   month         366 non-null    int64         
 1   day           366 non-null    int64         
 2   moonphase     50 non-null     object        
 3   specialevent  10 non-null     object        
 4   date          366 non-null    datetime64[ns]
dtypes: datetime64[ns](1), int64(2), object(2)
memory usage: 14.4+ KB


### Convert moon phase data to numbers

In [31]:
# Convert moon phase data to numbers
phases = {
    'new moon' : 0, 'first quarter' : 0.5, 'third quarter' : 0.5, 'full moon' : 1
}
phases

{'new moon': 0, 'first quarter': 0.5, 'third quarter': 0.5, 'full moon': 1}

In [32]:
# Add a new percentage column using phases data to the moon phase data frame
moon_phases['percentage'] = moon_phases['moonphase'].map(phases)

In [33]:
moon_phases

Unnamed: 0,month,day,moonphase,specialevent,date,percentage
0,1,1,,,2020-01-01,
1,1,2,first quarter,,2020-01-02,0.5
2,1,3,,,2020-01-03,
3,1,4,,,2020-01-04,
4,1,5,,,2020-01-05,
...,...,...,...,...,...,...
361,12,27,,,2020-12-27,
362,12,28,,,2020-12-28,
363,12,29,full moon,,2020-12-29,1.0
364,12,30,,,2020-12-30,


In [34]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 6 columns):
 #   Column        Non-Null Count  Dtype         
---  ------        --------------  -----         
 0   month         366 non-null    int64         
 1   day           366 non-null    int64         
 2   moonphase     50 non-null     object        
 3   specialevent  10 non-null     object        
 4   date          366 non-null    datetime64[ns]
 5   percentage    50 non-null     float64       
dtypes: datetime64[ns](1), float64(1), int64(2), object(2)
memory usage: 17.3+ KB


### Delete unnecessary columns

In [35]:
meteor_showers.columns

Index(['name', 'radiant', 'bestmonth', 'startmonth', 'startday', 'endmonth',
       'endday', 'hemisphere', 'preferredhemisphere', 'startdate', 'enddate'],
      dtype='object')

In [36]:
moon_phases.columns

Index(['month', 'day', 'moonphase', 'specialevent', 'date', 'percentage'], dtype='object')

In [37]:
constellations.columns

Index(['constellation', 'bestmonth', 'latitudestart', 'latitudeend',
       'besttime', 'hemisphere'],
      dtype='object')

In [38]:
# Delete unnecessary columns
meteor_showers.drop(labels=['startmonth', 'startday', 'endmonth',
       'endday', 'hemisphere'], axis = 1, inplace=True)

moon_phases.drop(labels=['month', 'day', 'moonphase', 'specialevent'], axis =1, inplace=True)

constellations.drop(labels='besttime', axis = 1, inplace= True)

In [39]:
meteor_showers

Unnamed: 0,name,radiant,bestmonth,preferredhemisphere,startdate,enddate
0,Lyrids,Lyra,4,northern,2020-04-21,2020-04-22
1,Eta Aquarids,Aquarius,5,southern,2020-04-19,2020-05-28
2,Orionids,Orion,10,"northern, southern",2020-10-02,2020-11-07
3,Perseids,Perseus,8,northern,2020-07-14,2020-08-24
4,Leonids,Leo,11,"northern, southern",2020-11-06,2020-11-30


In [40]:
moon_phases

Unnamed: 0,date,percentage
0,2020-01-01,
1,2020-01-02,0.5
2,2020-01-03,
3,2020-01-04,
4,2020-01-05,
...,...,...
361,2020-12-27,
362,2020-12-28,
363,2020-12-29,1.0
364,2020-12-30,


In [41]:
constellations

Unnamed: 0,constellation,bestmonth,latitudestart,latitudeend,hemisphere
0,Lyra,8,90,-40,northern
1,Aquarius,10,65,-90,southern
2,Orion,1,85,-75,northern
3,Perseus,12,90,-35,northern
4,Leo,4,90,65,northern


### Handle missing values ​​for moon phases

In [42]:
moon_phases.head()

Unnamed: 0,date,percentage
0,2020-01-01,
1,2020-01-02,0.5
2,2020-01-03,
3,2020-01-04,
4,2020-01-05,


In [44]:
# Handle missing values ​​for moon phases
# Iterating through each row of a data frame and filling it with the previous phase value

lastphase = 0
for index, row in moon_phases.iterrows():
    #print(index,row)
    if pd.isnull(row['percentage']) : 
        moon_phases.loc[index,'percentage'] = lastphase
        
    else: 
        lastphase = row['percentage']
   

In [45]:
moon_phases

Unnamed: 0,date,percentage
0,2020-01-01,0.0
1,2020-01-02,0.5
2,2020-01-03,0.5
3,2020-01-04,0.5
4,2020-01-05,0.5
...,...,...
361,2020-12-27,0.5
362,2020-12-28,0.5
363,2020-12-29,1.0
364,2020-12-30,1.0


In [46]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        366 non-null    datetime64[ns]
 1   percentage  366 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.8 KB
