In [None]:
# Step 1: Set Up the Environment
# Install required libraries (requests and pandas)
!pip install requests pandas -q

In [None]:
# Step 2: Ingest Data from a Public REST API
# Example: Fetch random user data from the Random User Generator API.
import requests
import pandas as pd

api_url = "https://randomuser.me/api/?results=10"
response = requests.get(api_url)
data = response.json()

# Convert the results to a DataFrame
df_api = pd.json_normalize(data['results'])
print("Data from REST API:")
print(df_api.head())

Data from REST API:
   gender                         email           phone            cell nat  \
0    male      babur.pekkan@example.com  (882)-054-6907  (402)-236-5742  TR   
1    male  anthony.coenraad@example.com   (068) 4530429   (06) 67742560  NL   
2  female      freja.larsen@example.com        55934787        60098980  DK   
3    male      joaquin.soto@example.com     936-834-352     695-212-221  ES   
4  female   consuelo.ibanez@example.com     937-945-003     654-709-071  ES   

  name.title name.first name.last  location.street.number  \
0         Mr      Babür    Pekkan                     709   
1         Mr    Anthony  Coenraad                    2128   
2        Mrs      Freja    Larsen                     311   
3         Mr    Joaquin      Soto                    2117   
4       Miss   Consuelo    Ibáñez                    5157   

  location.street.name  ...  \
0          Filistin Cd  ...   
1             Klarinet  ...   
2             Langgade  ...   
3        Calle

In [None]:
# Step 3: Ingest Data from a CSV File
# Example using the Iris dataset (direct CSV link):

csv_url = "https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv"
df_csv = pd.read_csv(csv_url)
print("\nData from CSV File:")
print(df_csv.head())



Data from CSV File:
   sepal_length  sepal_width  petal_length  petal_width species
0           5.1          3.5           1.4          0.2  setosa
1           4.9          3.0           1.4          0.2  setosa
2           4.7          3.2           1.3          0.2  setosa
3           4.6          3.1           1.5          0.2  setosa
4           5.0          3.6           1.4          0.2  setosa


In [None]:
# Step 4: Inspect and Compare Data
# Perform basic inspection on both data sources.
# Inspect columns and info for both DataFrames
print("API Data Columns:", df_api.columns)
print("CSV Data Columns:", df_csv.columns)

print("\nAPI Data Info:")
print(df_api.info())

print("\nCSV Data Info:")
print(df_csv.info())

API Data Columns: Index(['gender', 'email', 'phone', 'cell', 'nat', 'name.title', 'name.first',
       'name.last', 'location.street.number', 'location.street.name',
       'location.city', 'location.state', 'location.country',
       'location.postcode', 'location.coordinates.latitude',
       'location.coordinates.longitude', 'location.timezone.offset',
       'location.timezone.description', 'login.uuid', 'login.username',
       'login.password', 'login.salt', 'login.md5', 'login.sha1',
       'login.sha256', 'dob.date', 'dob.age', 'registered.date',
       'registered.age', 'id.name', 'id.value', 'picture.large',
       'picture.medium', 'picture.thumbnail'],
      dtype='object')
CSV Data Columns: Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
       'species'],
      dtype='object')

API Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10 entries, 0 to 9
Data columns (total 34 columns):
 #   Column                          Non-Null Count  Dtype 

In [None]:
df_api.to_csv("users_api.csv", index=False)
df_csv.to_csv("iris_csv.csv", index=False)


COVID -19

In [None]:
!pip install requests pandas -q


In [None]:
import requests
import pandas as pd

# API endpoint
url = "https://disease.sh/v3/covid-19/countries"

# Send request
response = requests.get(url)

# Check if response is successful
if response.status_code == 200:
    data = response.json()
    # Normalize JSON data into a flat table
    df_covid = pd.json_normalize(data)
    print("COVID-19 Data Loaded Successfully\n")
    print(df_covid.head())
else:
    print("Failed to retrieve data. Status code:", response.status_code)


COVID-19 Data Loaded Successfully

         updated      country   cases  todayCases  deaths  todayDeaths  \
0  1754300676200  Afghanistan  234174           0    7996            0   
1  1754300676192      Albania  334863           0    3605            0   
2  1754300676195      Algeria  272010           0    6881            0   
3  1754300676243      Andorra   48015           0     165            0   
4  1754300676222       Angola  107327           0    1937            0   

   recovered  todayRecovered  active  critical  ...  oneTestPerPeople  \
0     211080               0   15098         0  ...                29   
1     330233               0    1025         0  ...                 1   
2     183061               0   82068         0  ...               196   
3          0               0   47850         0  ...                 0   
4     103419               0    1971         0  ...                23   

   activePerOneMillion  recoveredPerOneMillion  criticalPerOneMillion  \
0       

In [None]:
print("Columns:", df_covid.columns.tolist())
print("\nData Info:")
print(df_covid.info())

print("\nMissing Values:")
print(df_covid.isnull().sum())


Columns: ['updated', 'country', 'cases', 'todayCases', 'deaths', 'todayDeaths', 'recovered', 'todayRecovered', 'active', 'critical', 'casesPerOneMillion', 'deathsPerOneMillion', 'tests', 'testsPerOneMillion', 'population', 'continent', 'oneCasePerPeople', 'oneDeathPerPeople', 'oneTestPerPeople', 'activePerOneMillion', 'recoveredPerOneMillion', 'criticalPerOneMillion', 'countryInfo._id', 'countryInfo.iso2', 'countryInfo.iso3', 'countryInfo.lat', 'countryInfo.long', 'countryInfo.flag']

Data Info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 231 entries, 0 to 230
Data columns (total 28 columns):
 #   Column                  Non-Null Count  Dtype  
---  ------                  --------------  -----  
 0   updated                 231 non-null    int64  
 1   country                 231 non-null    object 
 2   cases                   231 non-null    int64  
 3   todayCases              231 non-null    int64  
 4   deaths                  231 non-null    int64  
 5   todayDeaths       