In [1]:
import requests
import pandas as pd

import numpy as np

## Extract

In [44]:
# url for api

url = 'https://randomuser.me/api/'

num_of_users = '?results=100' # we can select the number of user to pull

url = url + num_of_users

print(url)

https://randomuser.me/api/?results=100


there are two major types of method in api: get and post

In [45]:
# hitting the api and storing the response

response = requests.get(url)


In [46]:
# if response is 200 then success, if 500 then internal error, 404 means not found
response

<Response [200]>

In [47]:
if response.status_code == 200: # to check if we are getting results
    data = response.json() # storing the result

In [48]:
data

{'results': [{'gender': 'male',
   'name': {'title': 'Mr', 'first': 'Jordan', 'last': 'Cooper'},
   'location': {'street': {'number': 5968, 'name': 'Pine Hill Road'},
    'city': 'Upper Hutt',
    'state': 'Canterbury',
    'country': 'New Zealand',
    'postcode': 73563,
    'coordinates': {'latitude': '53.4953', 'longitude': '-9.3661'},
    'timezone': {'offset': '+11:00',
     'description': 'Magadan, Solomon Islands, New Caledonia'}},
   'email': 'jordan.cooper@example.com',
   'login': {'uuid': '91f9ad2b-b2c7-4b77-ad2e-fa4d2fab5faf',
    'username': 'blacktiger984',
    'password': 'sites',
    'salt': 'UhWDUBD3',
    'md5': '970c50a6eda12eef6e2d39a49c62dd25',
    'sha1': '6f8e50b1d965b54bf0f57f72baed2fb3efe0c529',
    'sha256': '0b4a8e998e4457e10422e32cb9542a732bf5ce2a15e85e590ada4c093e7c85e7'},
   'dob': {'date': '1981-08-30T08:11:44.967Z', 'age': 43},
   'registered': {'date': '2008-03-05T03:44:24.166Z', 'age': 16},
   'phone': '(196)-739-6917',
   'cell': '(230)-965-5933',
   

In [56]:
df = pd.json_normalize(data) # decoding the json

In [57]:
df

Unnamed: 0,results,info.seed,info.results,info.page,info.version
0,"[{'gender': 'male', 'name': {'title': 'Mr', 'f...",eaa8df8036b8a0d8,100,1,1.4


In [61]:
for i in df['results']: # looking over each record within results column, it is nested json
    print(i)

[{'gender': 'male', 'name': {'title': 'Mr', 'first': 'Jordan', 'last': 'Cooper'}, 'location': {'street': {'number': 5968, 'name': 'Pine Hill Road'}, 'city': 'Upper Hutt', 'state': 'Canterbury', 'country': 'New Zealand', 'postcode': 73563, 'coordinates': {'latitude': '53.4953', 'longitude': '-9.3661'}, 'timezone': {'offset': '+11:00', 'description': 'Magadan, Solomon Islands, New Caledonia'}}, 'email': 'jordan.cooper@example.com', 'login': {'uuid': '91f9ad2b-b2c7-4b77-ad2e-fa4d2fab5faf', 'username': 'blacktiger984', 'password': 'sites', 'salt': 'UhWDUBD3', 'md5': '970c50a6eda12eef6e2d39a49c62dd25', 'sha1': '6f8e50b1d965b54bf0f57f72baed2fb3efe0c529', 'sha256': '0b4a8e998e4457e10422e32cb9542a732bf5ce2a15e85e590ada4c093e7c85e7'}, 'dob': {'date': '1981-08-30T08:11:44.967Z', 'age': 43}, 'registered': {'date': '2008-03-05T03:44:24.166Z', 'age': 16}, 'phone': '(196)-739-6917', 'cell': '(230)-965-5933', 'id': {'name': '', 'value': None}, 'picture': {'large': 'https://randomuser.me/api/portraits

In [62]:
# Create lists to store the extracted user data
first_name_ls = []
last_name_ls = []
emails_ls = []
genders_ls = []
countries_ls = []
age_ls = []

In [63]:
# Loop through each user and extract relevant details
for user in data['results']:
    
    email = user.get('email', np.nan) # if no value is present then it will impute None there
    emails_ls.append(email)
    
    first_name = user['name'].get('first', np.nan)
    first_name_ls.append(first_name)
    
    last_name = user['name'].get('last', np.nan)
    last_name_ls.append(last_name)
    
    gender = user.get('gender', np.nan)
    genders_ls.append(gender)
    
    country = user['location'].get('country', np.nan)
    countries_ls.append(country)
    
    age = user['dob'].get('age', np.nan)
    age_ls.append(age)
    

In [11]:
df = pd.DataFrame({
    'first_name': first_name_ls,
     'last_name': last_name_ls,
     'Email': emails_ls,
     'Gender': genders_ls,
     'Country': countries_ls,
     'age': age_ls
})


In [13]:
df.head()

Unnamed: 0,first_name,last_name,Email,Gender,Country,age
0,Brigitte,Dost,brigitte.dost@example.com,female,Germany,67
1,Luk'yan,Kozachinskiy,luk'yan.kozachinskiy@example.com,male,Ukraine,77
2,Anne-Kathrin,Zitzmann,anne-kathrin.zitzmann@example.com,female,Germany,24
3,Marlon,Van de Kleut,marlon.vandekleut@example.com,female,Netherlands,54
4,Scarlett,Harper,scarlett.harper@example.com,female,United Kingdom,73


## Load

create a schema and empty table in sql database before this step:

```create schema users;```

```
create table users.user_details 
(

first_name varchar(500),
last_name varchar(500),
Email varchar(500),
Gender varchar(100),
Country varchar(500),
age varchar(100)
)
```


In [2]:
import mysql.connector
mydb= mysql.connector.connect(host='localhost',user='root',passwd='sawan', database = 'users')

In [3]:
cursordb= mydb.cursor()

In [41]:
for index, row in df.iterrows(): # it will fetch one row at a time
    
    sql_query = "insert into user_details (first_name,last_name,Email,Gender,Country,age) values (%s,%s,%s,%s,%s,%s)"
    cursordb.execute(sql_query, (row['first_name'],row['last_name'],row['Email'],row['Gender'],row['Country'],row['age']))
print("data loaded successfully")

data loaded successfully


### Create stored procedure in sql to clean the data

```
DELIMITER // 
-- Provide name to procedure
CREATE PROCEDURE users.user_data_transformation ()
BEGIN
drop table if exists users.user_transformed;
create table users.user_transformed
SELECT first_name, last_name, email,
 SUBSTRING(email, LOCATE('@', email) + 1) AS hostname,
 gender, country, 
case when age<=18 then 'below 18'
when age>18 and age<=35 then '18-35'
when age>35 and age<=65 then '35-65'
when age>=65 then 'above 65'
else 'below 18'
end as age_bucket
  FROM   users.user_details; 
END // 
DELIMITER ;
```

In [5]:
# calling stored procedure
cursordb.callproc('user_data_transformation')

()

In [42]:
mydb.commit() # your changes will be reflected after commit
mydb.close()

## now use vscode to create .py file

**step 1** empty the tables,  truncate the users table and drop the output table of stored procedure
<br>**step 2** copy the same code in vscode, also add print and time sleep
<br> **step 3** open the cmd 
<br> **step 4** change directory to D if required using command
```D:```
<br> **step 5** change location cd Documents\DAS\KRPRO_Python\24012024\Projects\ETL
<br> **step 6** using ```dir``` you can see contents in folder
<br> **step 7** please install the python and respective libraries, pandas for eg: ```pip install requests```
<br> **step 8** now run the script using python api_script.py