# Exercises - Data Acquistion

The end result of this exercise should be a file named acquire.py.

1. Using the code from the lesson as a guide and the REST API from https://swapi.dev/ as we did in the lesson, create a dataframe named people that has all of the data for people.

In [107]:
import requests
import pandas as pd
import math

In [108]:
url = 'https://swapi.dev/api/people/'

In [109]:
response = requests.get(url)

In [110]:
data = response.json()

In [111]:
people_df = pd.DataFrame(data['results'])

In [112]:
while data['next'] != None:
    print(data['next'])
    response = requests.get(data['next'])
    data = response.json()
    people_df = pd.concat([people_df, pd.DataFrame(data['results'])], ignore_index = None)

https://swapi.dev/api/people/?page=2
https://swapi.dev/api/people/?page=3
https://swapi.dev/api/people/?page=4
https://swapi.dev/api/people/?page=5
https://swapi.dev/api/people/?page=6
https://swapi.dev/api/people/?page=7
https://swapi.dev/api/people/?page=8
https://swapi.dev/api/people/?page=9


In [113]:
people_df.shape

(82, 16)

2. Do the same thing, but for planets.

In [118]:
url = 'https://swapi.dev/api/planets/'

In [119]:
response = requests.get(url)

In [120]:
data = response.json()

In [121]:
planets_df = pd.DataFrame(data['results'])

In [122]:
while data['next'] != None:
    print(data['next'])
    response = requests.get(data['next'])
    data = response.json()
    planets_df = pd.concat([planets_df, pd.DataFrame(data['results'])], ignore_index = None)

https://swapi.dev/api/planets/?page=2
https://swapi.dev/api/planets/?page=3
https://swapi.dev/api/planets/?page=4
https://swapi.dev/api/planets/?page=5
https://swapi.dev/api/planets/?page=6


In [123]:
planets_df.shape

(60, 14)

3. Extract the data for starships.

In [124]:
url = 'https://swapi.dev/api/starships/'

In [125]:
response = requests.get(url)

In [126]:
data = response.json()

In [128]:
starships_df = pd.DataFrame(data['results'])

In [129]:
while data['next'] != None:
    print(data['next'])
    response = requests.get(data['next'])
    data = response.json()
    starships_df = pd.concat([starships_df, pd.DataFrame(data['results'])], ignore_index = None)

https://swapi.dev/api/starships/?page=2
https://swapi.dev/api/starships/?page=3
https://swapi.dev/api/starships/?page=4


In [130]:
starships_df.shape

(36, 18)

4. Save the data in your files to local csv files so that it will be faster to access in the future.

In [132]:
import os
import pandas as pd

def get_star_wars_data():
    '''
    This function reads the star wars data from 
    'https://swapi.dev/api/ site into a df.
    '''

    url = 'https://swapi.dev/api/people/'
    response = requests.get(url)
    data = response.json()
    people_df = pd.DataFrame(data['results'])
    while data['next'] != None:
        print(data['next'])
        response = requests.get(data['next'])
        data = response.json()
        people_df = pd.concat([people_df, pd.DataFrame(data['results'])], ignore_index = None)
    
    
    url = 'https://swapi.dev/api/planets/'
    response = requests.get(url)
    data = response.json()
    planets_df = pd.DataFrame(data['results'])
    while data['next'] != None:
        print(data['next'])
        response = requests.get(data['next'])
        data = response.json()
        planets_df = pd.concat([planets_df, pd.DataFrame(data['results'])], ignore_index = None)
    
    url = 'https://swapi.dev/api/starships/'
    response = requests.get(url)
    data = response.json()
    starships_df = pd.DataFrame(data['results'])
    while data['next'] != None:
        print(data['next'])
        response = requests.get(data['next'])
        data = response.json()
        starships_df = pd.concat([starships_df, pd.DataFrame(data['results'])], ignore_index = None)

    people_planets_df = people_df.append(planets_df, ignore_index=True)
    people_planets_starships_df = people_planets_df.append(starships_df, ignore_index=True)
    
    return people_planets_starships_df


def acquire_star_wars():
    '''
    This function reads in star wars data from the 
    https://swapi.dev/api/ site, writes data to
    a csv file if a local file does not exist, and returns a df.
    '''
    
    if os.path.isfile('star_wars.csv'):
        
        # If csv file exists, read in data from csv file.
        people_planets_starships_df = pd.read_csv('star_wars.csv', index_col=0)
        
    else:

        #creates new csv if one does not already exist
        people_planets_starships_df = get_star_wars_data()
        people_planets_starships_df.to_csv('star_wars.csv')

    return people_planets_starships_df


In [133]:
starwars_df = acquire_star_wars()


https://swapi.dev/api/people/?page=2
https://swapi.dev/api/people/?page=3
https://swapi.dev/api/people/?page=4
https://swapi.dev/api/people/?page=5
https://swapi.dev/api/people/?page=6
https://swapi.dev/api/people/?page=7
https://swapi.dev/api/people/?page=8
https://swapi.dev/api/people/?page=9
https://swapi.dev/api/planets/?page=2
https://swapi.dev/api/planets/?page=3
https://swapi.dev/api/planets/?page=4
https://swapi.dev/api/planets/?page=5
https://swapi.dev/api/planets/?page=6
https://swapi.dev/api/starships/?page=2
https://swapi.dev/api/starships/?page=3
https://swapi.dev/api/starships/?page=4


  people_planets_df = people_df.append(planets_df, ignore_index=True)
  people_planets_starships_df = people_planets_df.append(starships_df, ignore_index=True)


5. Combine the data from your three separate dataframes into one large dataframe.

In [134]:
starwars_df = get_star_wars_data()
starwars_df.head()

https://swapi.dev/api/people/?page=2
https://swapi.dev/api/people/?page=3
https://swapi.dev/api/people/?page=4
https://swapi.dev/api/people/?page=5
https://swapi.dev/api/people/?page=6
https://swapi.dev/api/people/?page=7
https://swapi.dev/api/people/?page=8
https://swapi.dev/api/people/?page=9
https://swapi.dev/api/planets/?page=2
https://swapi.dev/api/planets/?page=3
https://swapi.dev/api/planets/?page=4
https://swapi.dev/api/planets/?page=5
https://swapi.dev/api/planets/?page=6
https://swapi.dev/api/starships/?page=2
https://swapi.dev/api/starships/?page=3
https://swapi.dev/api/starships/?page=4


  people_planets_df = people_df.append(planets_df, ignore_index=True)
  people_planets_starships_df = people_planets_df.append(starships_df, ignore_index=True)


Unnamed: 0,name,height,mass,hair_color,skin_color,eye_color,birth_year,gender,homeworld,films,...,length,max_atmosphering_speed,crew,passengers,cargo_capacity,consumables,hyperdrive_rating,MGLT,starship_class,pilots
0,Luke Skywalker,172,77,blond,fair,blue,19BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",...,,,,,,,,,,
1,C-3PO,167,75,,gold,yellow,112BBY,,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",...,,,,,,,,,,
2,R2-D2,96,32,,"white, blue",red,33BBY,,https://swapi.dev/api/planets/8/,"[https://swapi.dev/api/films/1/, https://swapi...",...,,,,,,,,,,
3,Darth Vader,202,136,none,white,yellow,41.9BBY,male,https://swapi.dev/api/planets/1/,"[https://swapi.dev/api/films/1/, https://swapi...",...,,,,,,,,,,
4,Leia Organa,150,49,brown,light,brown,19BBY,female,https://swapi.dev/api/planets/2/,"[https://swapi.dev/api/films/1/, https://swapi...",...,,,,,,,,,,


In [135]:
starwars_df.shape

(178, 38)

6. Acquire the Open Power Systems Data for Germany, which has been rapidly expanding its renewable energy production in recent years. The data set includes country-wide totals of electricity consumption, wind power production, and solar power production for 2006-2017. You can get the data here: https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv

In [23]:
open_power_df = pd.read_csv('https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv')
open_power_df.head()

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.184,,,
1,2006-01-02,1380.521,,,
2,2006-01-03,1442.533,,,
3,2006-01-04,1457.217,,,
4,2006-01-05,1477.131,,,


7. Make sure all the work that you have done above is reproducible. That is, you should put the code above into separate functions in the acquire.py file and be able to re-run the functions and get the same data.

In [24]:
def get_open_power_data():
    '''
    This function reads the open power data from 
    'https://raw.githubusercontent.com site into a df.
    '''
    
    open_power_df = pd.read_csv('https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv')
    
    return open_power_df


def acquire_open_power():
    '''
    This function reads in open power data from the 
    https://raw.githubusercontent.com/jenfly/opsd/master/opsd_germany_daily.csv 
    site, writes data to a csv file if a local file does not exist, and returns a df.
    '''
    if os.path.isfile('open_power.csv'):
        
        # If csv file exists, read in data from csv file.
        open_power = pd.read_csv('open_power.csv', index_col=0)
        
    else:

        #creates new csv if one does not already exist
        open_power = get_open_power_data()
        open_power.to_csv('open_power.csv')

    return open_power

In [25]:
open_power = acquire_open_power()

In [26]:
open_power_df = get_open_power_data()
open_power_df.head()

Unnamed: 0,Date,Consumption,Wind,Solar,Wind+Solar
0,2006-01-01,1069.184,,,
1,2006-01-02,1380.521,,,
2,2006-01-03,1442.533,,,
3,2006-01-04,1457.217,,,
4,2006-01-05,1477.131,,,
