In [37]:
import numpy as np
import pandas as pd

meteor_showers = pd.read_csv('data/meteorshowers.csv')
moon_phases = pd.read_csv('data/moonphases.csv')
constellations = pd.read_csv('data/constellations.csv')
cities = pd.read_csv('data/cities.csv')

In [38]:
meteor_showers.head()

Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere
0,Lyrids,Lyra,april,april,21,april,22,northern,northern
1,Eta Aquarids,Aquarius,may,april,19,may,28,"northern, southern",southern
2,Orionids,Orion,october,october,2,november,7,"northern, southern","northern, southern"
3,Perseids,Perseus,august,july,14,august,24,northern,northern
4,Leonids,Leo,november,november,6,november,30,"northern, southern","northern, southern"


In [39]:
meteor_showers.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 5 non-null      object
 1   radiant              5 non-null      object
 2   bestmonth            5 non-null      object
 3   startmonth           5 non-null      object
 4   startday             5 non-null      int64 
 5   endmonth             5 non-null      object
 6   endday               5 non-null      int64 
 7   hemisphere           5 non-null      object
 8   preferredhemisphere  5 non-null      object
dtypes: int64(2), object(7)
memory usage: 488.0+ bytes


In [40]:
moon_phases.head()

Unnamed: 0,month,day,moonphase,specialevent
0,january,1,,
1,january,2,first quarter,
2,january,3,,
3,january,4,,
4,january,5,,


In [41]:
moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 4 columns):
 #   Column        Non-Null Count  Dtype 
---  ------        --------------  ----- 
 0   month         366 non-null    object
 1   day           366 non-null    int64 
 2   moonphase     50 non-null     object
 3   specialevent  10 non-null     object
dtypes: int64(1), object(3)
memory usage: 11.6+ KB


In [42]:
constellations.head()

Unnamed: 0,constellation,bestmonth,latitudestart,latitudeend,besttime,hemisphere
0,Lyra,august,90,-40,21:00,northern
1,Aquarius,october,65,-90,21:00,southern
2,Orion,january,85,-75,21:00,northern
3,Perseus,december,90,-35,21:00,northern
4,Leo,april,90,65,21:00,northern


In [43]:
constellations.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 6 columns):
 #   Column         Non-Null Count  Dtype 
---  ------         --------------  ----- 
 0   constellation  5 non-null      object
 1   bestmonth      5 non-null      object
 2   latitudestart  5 non-null      int64 
 3   latitudeend    5 non-null      int64 
 4   besttime       5 non-null      object
 5   hemisphere     5 non-null      object
dtypes: int64(2), object(4)
memory usage: 368.0+ bytes


In [44]:
cities.head()

Unnamed: 0,city,latitude,country
0,Abu Dhabi,24.47,United Arab Emirates
1,Abuja,9.07,Nigeria
2,Accra,5.55,Ghana
3,Adamstown,-25.07,Pitcairn Islands
4,Addis Ababa,9.02,Ethiopia


In [45]:
cities.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 256 entries, 0 to 255
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   city      256 non-null    object 
 1   latitude  256 non-null    float64
 2   country   256 non-null    object 
dtypes: float64(1), object(2)
memory usage: 6.1+ KB


In [46]:
# Convert to numbers

# We can see from the calls to head() that a lot of information is written in words (strings) instead of numbers (integers). Some data makes sense as strings, like city names or meteor shower names. But other data makes more sense as integers, like months or Moon phases.

#You can quickly convert the month columns to numbers:
#   Create a map of months to numbers. We can see from the output of head() that the months are all lowercase.
months = {'january':1, 'february':2, 'march':3, 'april':4, 'may':5, 'june':6, 'july':7, 'august':8, 'september':9, 'october':10, 'november':11, 'december':12}

#   Map the map of months to the columns that have months in them.
meteor_showers.bestmonth = meteor_showers.bestmonth.map(months)
meteor_showers.startmonth = meteor_showers.startmonth.map(months)
meteor_showers.endmonth = meteor_showers.endmonth.map(months)
moon_phases.month = moon_phases.month.map(months)

#   Save the result to the data frame.
constellations.bestmonth = constellations.bestmonth.map(months)


In [47]:
meteor_showers.head()


Unnamed: 0,name,radiant,bestmonth,startmonth,startday,endmonth,endday,hemisphere,preferredhemisphere
0,Lyrids,Lyra,4,4,21,4,22,northern,northern
1,Eta Aquarids,Aquarius,5,4,19,5,28,"northern, southern",southern
2,Orionids,Orion,10,10,2,11,7,"northern, southern","northern, southern"
3,Perseids,Perseus,8,7,14,8,24,northern,northern
4,Leonids,Leo,11,11,6,11,30,"northern, southern","northern, southern"


In [48]:
meteor_showers.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 9 columns):
 #   Column               Non-Null Count  Dtype 
---  ------               --------------  ----- 
 0   name                 5 non-null      object
 1   radiant              5 non-null      object
 2   bestmonth            5 non-null      int64 
 3   startmonth           5 non-null      int64 
 4   startday             5 non-null      int64 
 5   endmonth             5 non-null      int64 
 6   endday               5 non-null      int64 
 7   hemisphere           5 non-null      object
 8   preferredhemisphere  5 non-null      object
dtypes: int64(5), object(4)
memory usage: 488.0+ bytes


In [49]:
# Before you continue, convert months and days in the meteor_showers data frame to a type called datetime, which tracks dates.

#Create two new columns: startdate and enddate. These columns will contain a month and day in 2020
meteor_showers['startdate'] = pd.to_datetime(2020*10000+meteor_showers.startmonth*100+meteor_showers.startday,format='%Y%m%d')
meteor_showers['enddate'] = pd.to_datetime(2020*10000+meteor_showers.endmonth*100+meteor_showers.endday,format='%Y%m%d')


In [50]:
#Follow the same pattern for moon_phases:
moon_phases['date'] = pd.to_datetime(2020*10000+moon_phases.month*100+moon_phases.day,format='%Y%m%d')

In [51]:
#Next, convert hemisphere data to numbers by using the mapping process:
hemispheres = {'northern':0, 'southern':1, 'northern, southern':3}
meteor_showers.hemisphere = meteor_showers.hemisphere.map(hemispheres)
constellations.hemisphere = constellations.hemisphere.map(hemispheres)


In [52]:
# Finally, convert Moon phases to numbers that represent the percentage of the Moon that's visible. This time, add a new column to represent the data:

#Create the map of phases to numbers.
phases = {'new moon':0,'third quarter':0.5, 'first quarter':0.5,'full moon':1.0}

#Add a new column called percentage and set it to the moonphase column that's mapped to the numbers.
moon_phases['percentage'] = moon_phases.moonphase.map(phases)

#Show the first five rows.
moon_phases.head()

Unnamed: 0,month,day,moonphase,specialevent,date,percentage
0,1,1,,,2020-01-01,
1,1,2,first quarter,,2020-01-02,0.5
2,1,3,,,2020-01-03,
3,1,4,,,2020-01-04,
4,1,5,,,2020-01-05,


In [53]:
# Now you've converted all the data that makes more sense as numbers. But some values are missing.


In [54]:
#Some of the data from these .csv files isn't useful. You can delete the following data:

#REMOVE UNNECESSARY DATA
#Data frame : #meteor_showers     
#Columns to remove : startmonth, startday, endmonth, endday, hemisphere
#Reason :The month and day information is captured in the startdate and enddate columns. The preferredhemisphere column is the optimal value.

#Data frame : moon_phases
#Columns to remove : month, day, moonphase, specialevent
#Reason : Month and day are captured by date. The Moon phase is captured by percentage. The specialevent column isn't relevant

#Data frame : constellations
#Columns to remove : besttime
#Reason : Every row is 21:00

# Here's how to remove those columns
meteor_showers = meteor_showers.drop(['startmonth', 'startday', 'endmonth', 'endday', 'hemisphere'], axis=1)
moon_phases = moon_phases.drop(['month','day','moonphase','specialevent'], axis=1)
constellations = constellations.drop(['besttime'], axis=1)


In [55]:
# Now it's time to fill in the missing data
# One of the .csv files is particularly interesting. The output of moon_phases.info() shows 
moon_phases.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        366 non-null    datetime64[ns]
 1   percentage  50 non-null     float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.8 KB


In [56]:
# You see that the cycle of the Moon phases goes from 0 to 0.5 to 1 to 0.5 and then back to 0. So you could conceivably make every value between 0 and 0.5 be 0.25. And you could make every value between 0.5 and 1 be 0.75.

#You could get more detailed by figuring out a more accurate percentage on your own:
#   Import the math Python library.
#   Create a variable to save the last phase that you saw.
#   Loop through each row and column in the moon_phases data frame.
#   If the value in the percentage column of a row is nan (null), then replace it with the last phase that you saw.
#   If the value isn't nan, then save the value as the last phase that you saw.
#   Show the info for the moon_phase data frame:
lastPhase = 0

for index, row in moon_phases.iterrows():
    if pd.isnull(row['percentage']):
        moon_phases.at[index,'percentage'] = lastPhase
    else:
        lastPhase = row['percentage']

moon_phases.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 366 entries, 0 to 365
Data columns (total 2 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        366 non-null    datetime64[ns]
 1   percentage  366 non-null    float64       
dtypes: datetime64[ns](1), float64(1)
memory usage: 5.8 KB


In [57]:
# Now your data is cleansed and ready to be analyzed!

In [58]:
#function to get the latitude of the city
def predict_best_meteor_shower_viewing(city):
    # Get the latitude of the city from the cities dataframe
    latitude = cities.loc[cities['city'] == city, 'latitude'].iloc[0]

    return latitude

In [59]:
#how to use it
print(predict_best_meteor_shower_viewing('Abu Dhabi'))

24.47


In [60]:
def predict_best_meteor_shower_viewing(city):
    # Create an empty string to return the message back to the user
    meteor_shower_string = ""

    if city not in cities.values:
        meteor_shower_string = "Unfortunately, " + city + " isn't available for a prediction at this time."
        return meteor_shower_string

    # Get the latitude of the city from the cities data frame
    latitude = cities.loc[cities['city'] == city, 'latitude'].iloc[0]

    # Get the list of constellations that are viewable from that latitude
    constellation_list = constellations.loc[(constellations['latitudestart'] >= latitude) & (constellations['latitudeend'] <= latitude), 'constellation'].tolist()
    # We can break down this line as follows:
        # (constellations['latitudestart'] >= latitude) & (constellations['latitudeend'] <= latitude)
            # Mark a row as True only if the latitude found in the previous line is within the latitudestart and latitudeend values for that row.
        # constellations.loc[(constellations['latitudestart'] >= latitude) & (constellations['latitudeend'] <= latitude)
            #Get all of the rows where the latitude is within range for that constellation.
        # constellations.loc[(constellations['latitudestart'] >= latitude) & (constellations['latitudeend'] <= latitude), 'constellation']
            # Get only the constellation column from those rows.
        # constellations.loc[(constellations['latitudestart'] >= latitude) & (constellations['latitudeend'] <= latitude), 'constellation'].tolist()
            # Convert the series returned from the .loc function to a list.

    # If no constellations are viewable, let the user know
    if not constellation_list:
        meteor_shower_string = "Unfortunately, there are no meteor showers viewable from "+ city + "."

        return meteor_shower_string

    meteor_shower_string = "In " + city + " you can see the following meteor showers:\n"
    
    # Iterate through each constellation that is viewable from the city
    for constellation in constellation_list:
        # Find the meteor shower that is nearest to that constellation
        meteor_shower = meteor_showers.loc[meteor_showers['radiant'] == constellation, 'name'].iloc[0]

        # Find the start and end dates for that meteor shower
        meteor_shower_startdate = meteor_showers.loc[meteor_showers['radiant'] == constellation, 'startdate'].iloc[0]
        meteor_shower_enddate = meteor_showers.loc[meteor_showers['radiant'] == constellation, 'enddate'].iloc[0]

        # Find the Moon phases for each date within the viewable time frame of that meteor shower
        moon_phases_list = moon_phases.loc[(moon_phases['date'] >= meteor_shower_startdate) & (moon_phases['date'] <= meteor_shower_enddate)]

        # Find the first date where the Moon is the least visible
        best_moon_date = moon_phases_list.loc[moon_phases_list['percentage'].idxmin()]['date']

        # Add that date to the string to report back to the user
        meteor_shower_string += meteor_shower + " is best seen if you look towards the " + constellation + " constellation on " +  best_moon_date.to_pydatetime().strftime("%B %d, %Y") + ".\n"
    
    return meteor_shower_string




In [61]:
print(predict_best_meteor_shower_viewing('Abu Dhabi'))

In Abu Dhabi you can see the following meteor showers:
Lyrids is best seen if you look towards the Lyra constellation on April 22, 2020.
Eta Aquarids is best seen if you look towards the Aquarius constellation on April 22, 2020.
Orionids is best seen if you look towards the Orion constellation on October 16, 2020.
Perseids is best seen if you look towards the Perseus constellation on July 20, 2020.



In [62]:
print(predict_best_meteor_shower_viewing('Paris'))

In Paris you can see the following meteor showers:
Lyrids is best seen if you look towards the Lyra constellation on April 22, 2020.
Eta Aquarids is best seen if you look towards the Aquarius constellation on April 22, 2020.
Orionids is best seen if you look towards the Orion constellation on October 16, 2020.
Perseids is best seen if you look towards the Perseus constellation on July 20, 2020.



In [63]:
cities.head(100)

Unnamed: 0,city,latitude,country
0,Abu Dhabi,24.47,United Arab Emirates
1,Abuja,9.07,Nigeria
2,Accra,5.55,Ghana
3,Adamstown,-25.07,Pitcairn Islands
4,Addis Ababa,9.02,Ethiopia
...,...,...,...
95,Kathmandu,27.70,Nepal
96,Khartoum,15.63,Sudan
97,Kigali,-1.93,Rwanda
98,King Edward Point,-54.27,South Georgia and the South Sandwich Islands


In [64]:
print(predict_best_meteor_shower_viewing('Kingston'))

In Kingston you can see the following meteor showers:
Lyrids is best seen if you look towards the Lyra constellation on April 22, 2020.
Eta Aquarids is best seen if you look towards the Aquarius constellation on April 22, 2020.
Orionids is best seen if you look towards the Orion constellation on October 16, 2020.
Perseids is best seen if you look towards the Perseus constellation on July 20, 2020.



In [65]:
print(predict_best_meteor_shower_viewing('King Edward Point'))

In King Edward Point you can see the following meteor showers:
Eta Aquarids is best seen if you look towards the Aquarius constellation on April 22, 2020.
Orionids is best seen if you look towards the Orion constellation on October 16, 2020.

