# Import libraries and generate token


In [69]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy.point import Point
from cryptography.fernet import Fernet


headers = {
    'accept': 'application/json',
    'Content-Type': 'application/json',
}

json_data = {
    'full_name': 'Fabiano Moreira Alves',
    'email': 'fabianomalves@proton.me',
}

access_token = requests.post('https://begrowth.deta.dev/user/', headers=headers, json=json_data)
access_token_json = access_token.json()

print(access_token_json)


{'user': 'fabianomalves@proton.me', 'API Token': 'BGO0C05S5J1S'}


# Split the user and key as dictionary to get the acess token

In [70]:
# split dictionary into keys and values
keys = []
values = []
items = access_token_json.items()
for item in items:
    keys.append(item[0]), values.append(item[1])
 
# printing keys and values separately
string_acess_token = str(values[-1])
print(string_acess_token)


BGO0C05S5J1S


# Consume the API, calling the endpoint https://begrowth.deta.dev/token=access_token, concatenatating the url with the token. Then, create a dataframe for the jason and print data frame result.

In [71]:
url_dev = "https://begrowth.deta.dev/token="
url_dev_with_token = url_dev + string_acess_token
print(url_dev_with_token)


https://begrowth.deta.dev/token=BGO0C05S5J1S


# Normalize the json data into pandas Data Frame

In [72]:
data = json.loads(requests.get(url_dev_with_token).text)
df_json_normalize = pd.json_normalize(data)
print(df_json_normalize)


        id   first_name   last_name                         email  gender  \
0        1       Kellen     Cowherd            kcowherd0@nasa.gov    Male   
1        2     Josefina     Swalowe        jswalowe1@slashdot.org  Female   
2        3    Priscilla    Prickett           pprickett2@webs.com  Female   
3        3    Priscilla    Prickett           pprickett2@webs.com  Female   
4        4        Leroi    Spinello          lspinello3@google.fr    Male   
...    ...          ...         ...                           ...     ...   
1015   997        Boote      Downse  bdownsero@huffingtonpost.com    Male   
1016   979       Shelly    Storrock        sstorrockr6@census.gov  Female   
1017   998       Cindie  Riddlesden    criddlesdenrp@hubpages.com  Female   
1018   999     Roderick     Alfonso      ralfonsorq@quantcast.com    Male   
1019  1000  Konstantine      Perell         kperellrr@blogger.com    Male   

             utm                                                cpf  \
0   

# Find duplicate rows and filtering them

In [73]:
duplicate_rows = df_json_normalize[df_json_normalize.duplicated(['id', ])]
print(duplicate_rows)


       id first_name    last_name                         email  gender  \
3       3  Priscilla     Prickett           pprickett2@webs.com  Female   
42     25       Flin  Friedenbach       ffriedenbacho@google.pl    Male   
64     62    Ermanno       Golson        egolson1p@so-net.ne.jp    Male   
82     51     Archer       Moreno            amoreno1e@yale.edu    Male   
109    91        Ara     Carrabot     acarrabot2i@wordpress.org  Female   
195   106      Parke       Wooles       pwooles2x@google.com.au    Male   
256   233   Veronika         Skae            vskae6g@meetup.com  Female   
273   266      Weber       Priest            wpriest7d@java.com    Male   
310   273     Trevor     Yosevitz      tyosevitz7k@redcross.org    Male   
360   336    Vanessa       Allsep        vallsep9b@amazon.co.jp  Female   
429   419     Mortie         Lots             mlotsbm@imgur.com    Male   
478   413    Tiphany     Endricci          tendriccibg@cnet.com  Female   
520   497     Ramsey     

# Drop duplicate rows, filtering by 'id'

In [74]:
df_distinct_id_rows = df_json_normalize.drop_duplicates(subset=['id'])
print(df_distinct_id_rows)


        id   first_name    last_name                         email  gender  \
0        1       Kellen      Cowherd            kcowherd0@nasa.gov    Male   
1        2     Josefina      Swalowe        jswalowe1@slashdot.org  Female   
2        3    Priscilla     Prickett           pprickett2@webs.com  Female   
4        4        Leroi     Spinello          lspinello3@google.fr    Male   
5        5     Bellanca      Laycock       blaycock4@amazonaws.com  Female   
...    ...          ...          ...                           ...     ...   
1014   996        Bride  Grzegorczyk     bgrzegorczykrn@flavors.me  Female   
1015   997        Boote       Downse  bdownsero@huffingtonpost.com    Male   
1017   998       Cindie   Riddlesden    criddlesdenrp@hubpages.com  Female   
1018   999     Roderick      Alfonso      ralfonsorq@quantcast.com    Male   
1019  1000  Konstantine       Perell         kperellrr@blogger.com    Male   

             utm                                               

# Doing the reverse geocoding and creating address field for the singles rows

In [86]:
# Create a geocoder object using the Nominatim API
geolocator = Nominatim(user_agent="my_geocoder_state")

# Define a function to reverse geocode the state
def get_state(lat, lng):
  # Use the geocoder object to reverse geocode the coordinates
  location = geolocator.reverse((lat, lng))
  # Extract the state from the response
  state = location.raw['address']['state']
  return state

# Apply the function to each row of the DataFrame and store the result in a new column

df_distinct_id_rows.loc[:, ['address_state']] == df_distinct_id_rows.apply(lambda x: get_state(x['address.geo_latitude'], x['address.geo_longitude']), axis=1)



# Print the resulting DataFrame
print(df_distinct_id_rows)






        id   first_name    last_name                         email  gender  \
0        1       Kellen      Cowherd            kcowherd0@nasa.gov    Male   
1        2     Josefina      Swalowe        jswalowe1@slashdot.org  Female   
2        3    Priscilla     Prickett           pprickett2@webs.com  Female   
4        4        Leroi     Spinello          lspinello3@google.fr    Male   
5        5     Bellanca      Laycock       blaycock4@amazonaws.com  Female   
...    ...          ...          ...                           ...     ...   
1014   996        Bride  Grzegorczyk     bgrzegorczykrn@flavors.me  Female   
1015   997        Boote       Downse  bdownsero@huffingtonpost.com    Male   
1017   998       Cindie   Riddlesden    criddlesdenrp@hubpages.com  Female   
1018   999     Roderick      Alfonso      ralfonsorq@quantcast.com    Male   
1019  1000  Konstantine       Perell         kperellrr@blogger.com    Male   

             utm                                               

  df_distinct_id_rows.loc[:, ['address_state']] == df_distinct_id_rows.apply(lambda x: get_state(x['address.geo_latitude'], x['address.geo_longitude']), axis=1)


Saving dataframe into csv file

In [None]:
df_distinct_id_rows.to_csv('../data_engineer_test/df_distinct_id_rows.csv')

Read csv file

In [None]:
df_distinct_id_rows = pd.read_csv('../data_engineer_test/df_distinct_id_rows.csv')
df_distinct_id_rows.head()

Decript the CPF column