# Import libraries


In [49]:
import requests
import json
import pandas as pd
from pandas.io.json import json_normalize
import numpy as np
import geopy
from geopy.geocoders import Nominatim
from geopy.point import Point
from cryptography.fernet import Fernet
from datetime import datetime
from google.oauth2.service_account import Credentials
from google.cloud import bigquery
from google.oauth2 import service_account
import pandas_gbq







# Generate token to access API

In [50]:


headers = {
    'accept': 'application/json',
    'Content-Type': 'application/json',
}

json_data = {
    'full_name': 'Fabiano Moreira Alves',
    'email': 'fabianomalves@proton.me',
}

access_token = requests.post('https://begrowth.deta.dev/user/', headers=headers, json=json_data)
access_token_json = access_token.json()

print(access_token_json)

{'user': 'fabianomalves@proton.me', 'API Token': 'BGEU2QUMX97D'}


# Split the user and key as dictionary to get the acess token

In [None]:
# split dictionary into keys and values
keys = []
values = []
items = access_token_json.items()
for item in items:
    keys.append(item[0]), values.append(item[1])
 
# printing keys and values separately
string_acess_token = str(values[-1])
print(string_acess_token)


# Consume the API, calling the endpoint https://begrowth.deta.dev/token=access_token, concatenatating the url with the token. Then, create a dataframe for the jason and print data frame result.

In [None]:
url_dev = "https://begrowth.deta.dev/token="
url_dev_with_token = url_dev + string_acess_token
print(url_dev_with_token)

# Normalize the json data into pandas Data Frame

In [None]:
data = json.loads(requests.get(url_dev_with_token).text)
df_json_normalize = pd.json_normalize(data)
print(df_json_normalize)


# Find duplicate rows and filtering them

In [None]:
duplicate_rows = df_json_normalize[df_json_normalize.duplicated(['id', ])]
print(duplicate_rows)


# Drop duplicate rows, filtering by 'id'

In [None]:
df_distinct_id_rows = df_json_normalize.drop_duplicates(subset=['id'])
print(df_distinct_id_rows)


# Doing the reverse geocoding and creating address column for the singles rows

In [None]:
# Create a geocoder object using the Nominatim API
geolocator = Nominatim(user_agent="my_geocoder_state")

# Define a function to reverse geocode the state
def get_state(lat, lng):
  # Use the geocoder object to reverse geocode the coordinates
  location = geolocator.reverse((lat, lng))
  # Extract the state from the response
  state = location.raw['address']['state']
  return state

# Apply the function to each row of the DataFrame and store the result in a new column

df_distinct_id_rows.loc[:, ['address_state']] = df_distinct_id_rows.apply(lambda x: get_state(x['address.geo_latitude'], x['address.geo_longitude']), axis=1)



# Print the resulting DataFrame
df_distinct_id_rows.head()



# Saving dataframe into csv file, for preventing new deletions

In [None]:
df_distinct_id_rows.to_csv('../data_engineer_test/df_distinct_id_rows.csv')

# Read csv file

In [None]:
df_distinct_id_rows = pd.read_csv('../data_engineer_test/df_distinct_id_rows.csv')
df_distinct_id_rows.head()

# Decript the CPF column

In [None]:
fernet = Fernet(b'ekkxXo0uHWRkIbHqHrLS4gaMj2hWTYMJyPTAbi9INGI=')

df_distinct_id_rows['cpf'] = df_distinct_id_rows['cpf'].apply(lambda x: fernet.decrypt(x.encode()))

In [None]:
df_distinct_id_rows.head()

# Save the decrypted data result to a csv file

In [None]:

df_distinct_id_rows.to_csv('../data_engineer_test/df_decrypt_cpf.csv')


# Read the csv decrypted data

In [None]:
df_decrypt_cpf = pd.read_csv('../data_engineer_test/df_decrypt_cpf.csv')
df_decrypt_cpf.head()
#df_decrypt_cpf.info()

# Formating the cpf column

In [None]:
df_decrypt_cpf['cpf'] = df_decrypt_cpf['cpf'].apply(lambda x: x[2: -1])


In [None]:
df_decrypt_cpf.head()

# Inserting columns 

In [None]:
now = datetime.now()

# Create new columns
df_decrypt_cpf = df_decrypt_cpf.assign(
    dt_insert=now,
    candidate_name='Fabiano Moreira Alves'
)


In [None]:
df_decrypt_cpf.head()

# Removing Unnamed columns

In [None]:
del df_decrypt_cpf['Unnamed: 0']
del df_decrypt_cpf['Unnamed: 0.1']
df_decrypt_cpf.head()

# Alter data columns to '_' instead '.'.

In [None]:
df_decrypt_cpf_ready = df_decrypt_cpf.rename(columns=lambda x: x.replace('.', '_'))

df_decrypt_cpf_ready.head()


# Saving the final transformed dataframe into df_decrypt_cpf_ready.csv

In [None]:
df_decrypt_cpf_ready.to_csv('../data_engineer_test/df_decrypt_cpf_ready.csv')

# Create a service account with the key provided in email

In [None]:
# Load the service account key file

key_path = '../data_engineer_test/svc-data-engineer-test.json'
credentials = Credentials.from_service_account_file(key_path)

client = bigquery.Client(credentials=credentials)



print(client)
    

# Read the transform csv file

In [None]:
df_decrypt_cpf_ready = pd.read_csv('../data_engineer_test/df_decrypt_cpf_ready.csv')



In [None]:
# Set the project and dataset ID

project_id = 'begrowth-user-api-demo'
dataset_id = 'bg_users'
table_id = 'bg_data_enginner_test_fabiano_moreira'

def create_bigquery_table(df, dataset_tablename, gcp_project_name):
    # df: your pandas dataframe
    # dataset_tablename (str.str): dataset_name.tablename
    # gcp_project_name: GCP Project ID
    
    df_decrypt_cpf_ready.to_gbq(
        destination_table='bg_users.bg_data_enginner_test_fabiano_moreira',
        project_id='begrowth-user-api-demo',
        if_exists="replace",  # 3 available methods: fail/replace/append
    )





In [None]:
project_id = 'begrowth-user-api-demo'
dataset_id = 'bg_users'
table_id = 'bg_data_enginner_test_fabiano_moreira'

# Construct the SELECT query
query = f"SELECT * FROM `{project_id}.{dataset_id}.{table_id}`"

# Execute the query and load the results into a pandas DataFrame
df = pd.read_gbq(query, project_id=project_id, credentials=credentials)

# Print the DataFrame
print(df)

In [None]:
project_id = 'begrowth-user-api-demo'
dataset_id = 'bg_users'
table_id = 'bg_data_enginner_test_fabiano_moreira_alves'

# Initialize the BigQuery client
client = bigquery.Client(credentials=credentials)

# Get a reference to the table
table = client.get_table(f"{project_id}.{dataset_id}.{table_id}")

# Delete the table
client.delete_table(table)