In [None]:
!pip install pandas, requests, json, matplotlib

In [None]:
import requests, json
import pandas as pd
import matplotlib.pyplot as plt

In [6]:
# specify the required format of the data requested from the api
headers = {
    "Content-type": "application/json",
    "Content-Type": "application/json;charset=UTF-8",
}

In [7]:
# make the request
request = requests.get("https://global-warming.org/api/co2-api", headers=headers)

In [8]:
# assign the json result to a variable called 'data'
data = request.json()

In [9]:
# confirm the data has the expected type
print(type(data))

<class 'dict'>


In [16]:
# coerce the data into a dataframe
unformatted_df = pd.DataFrame.from_dict(data, orient="columns")

In [17]:
# verify the output
print(unformatted_df)

                                                    co2
0     {'year': '2011', 'month': '1', 'day': '1', 'cy...
1     {'year': '2011', 'month': '1', 'day': '2', 'cy...
2     {'year': '2011', 'month': '1', 'day': '3', 'cy...
3     {'year': '2011', 'month': '1', 'day': '4', 'cy...
4     {'year': '2011', 'month': '1', 'day': '5', 'cy...
...                                                 ...
3743  {'year': '2021', 'month': '4', 'day': '1', 'cy...
3744  {'year': '2021', 'month': '4', 'day': '2', 'cy...
3745  {'year': '2021', 'month': '4', 'day': '3', 'cy...
3746  {'year': '2021', 'month': '4', 'day': '4', 'cy...
3747  {'year': '2021', 'month': '4', 'day': '5', 'cy...

[3748 rows x 1 columns]


# Beging data cleaning
"""
Currently the dataframe exists in the following format:

                                                    co2
0     {'year': '2011', 'month': '1', 'day': '1', 'cy...
1     {'year': '2011', 'month': '1', 'day': '2', 'cy...
2     {'year': '2011', 'month': '1', 'day': '3', 'cy...
3     {'year': '2011', 'month': '1', 'day': '4', 'cy...
4     {'year': '2011', 'month': '1', 'day': '5', 'cy...

This is not optimal, I want to rearrange the data so that the keys of each dictionary/json
object are dataframe columns corresponding to their associated values. 
"""

In [18]:
# rearrange the data
df = pd.DataFrame(df['co2'].values.tolist())

In [19]:
print(df)

      year month day   cycle   trend
0     2011     1   1  391.25  389.75
1     2011     1   2  391.29  389.76
2     2011     1   3  391.32  389.77
3     2011     1   4  391.36  389.77
4     2011     1   5  391.39  389.78
...    ...   ...  ..     ...     ...
3743  2021     4   1  416.28  414.44
3744  2021     4   2  416.29  414.45
3745  2021     4   3  416.30  414.46
3746  2021     4   4  416.32  414.46
3747  2021     4   5  416.33  414.47

[3748 rows x 5 columns]


In [37]:
# Check the dataframe for any null or duplicate values

# rows with missing data
rows_null_data = df[df.isnull()]

# the sum null values in a specific row
sum_rows_null_data = df['trend'].isnull().sum()

# check for NaN under single column
nan_column = df['trend'].isnull().values.any()
print(nan_column)

False


# Data cleaning example
"""
The global warming dataset I have chosen has already been cleaned, and appears to be in good condition. 
For the sake of fulfilling the milestone requirements of the project, I will demonstrate some data cleansing
best practices with a dataset that is better suited for the same:

Dataset: Iris Species

Classify iris plants into three species in this classic dataset

https://www.kaggle.com/uciml/iris
"""

In [39]:
# This data is from a CSV import, the file can be found in the data folder of the project repo.
# import the csv file:
iris_species = pd.read_csv("data/iris.csv")
iris_species_dataframe = pd.DataFrame(iris_species)

In [40]:
print(iris_species_dataframe)

      Id  SepalLengthCm  SepalWidthCm  PetalLengthCm  PetalWidthCm  \
0      1            5.1           3.5            1.4           0.2   
1      2            4.9           3.0            1.4           0.2   
2      3            4.7           3.2            1.3           0.2   
3      4            4.6           3.1            1.5           0.2   
4      5            5.0           3.6            1.4           0.2   
..   ...            ...           ...            ...           ...   
145  146            6.7           3.0            5.2           2.3   
146  147            6.3           2.5            5.0           1.9   
147  148            6.5           3.0            5.2           2.0   
148  149            6.2           3.4            5.4           2.3   
149  150            5.9           3.0            5.1           1.8   

            Species  
0       Iris-setosa  
1       Iris-setosa  
2       Iris-setosa  
3       Iris-setosa  
4       Iris-setosa  
..              ...  
145  