# StackOverflow - Exploratory Data Analysis

Perform an Exploratory Data Analysis over the data included in the BigQuery dataset `bigquery-public-data.stackoverflow`, in order to uncover interesting patterns and potential use cases within the data.

# Setup Notebook

## Imports

In [1]:
# Import Standard Libraries
import os
from pathlib import Path
from dynaconf import Dynaconf

# Import Package Modules
from src.bigquery_connector.bigquery_connector import BigQueryConnector
from src.types import BigQueryClientConfig

## Define Configurations

In [2]:
# Setup root path
root_path = Path(os.getcwd()).parents[2]

In [3]:
# Read configuration
config = Dynaconf(
    settings_files=[
        root_path / 'configuration' / 'settings.toml'
    ],
    environments=True, 
    env='eda'
)

In [4]:
# Retrieve configurations
bigquery_client_config = config['bigquery_client']
relevant_users_config = config['relevant_users']

# Read Data

In [5]:
# Instance BigQuery Connector
bigquery_connector = BigQueryConnector(BigQueryClientConfig(**bigquery_client_config))

[10/30/2024 20:32:23 - BigQueryConnector] INFO - _set_client - Set the BigQuery client


In [6]:
# Read data from BigQuery
relevant_users = bigquery_connector.read_from_query_config(relevant_users_config)

[10/30/2024 20:32:24 - BigQueryConnector] INFO - read_from_query_config - Reading query file: queries/stackoverflow/eda/users/relevant_users.sql
[10/30/2024 20:32:24 - general_utils] INFO - read_file_from_path - Reading file from /Users/s.porreca/Projects/DruidicGroveAI/queries/stackoverflow/eda/users/relevant_users.sql
[10/30/2024 20:32:24 - general_utils] INFO - read_file_from_path - Successfully file read from /Users/s.porreca/Projects/DruidicGroveAI/queries/stackoverflow/eda/users/relevant_users.sql
[10/30/2024 20:32:24 - BigQueryConnector] INFO - build_bigquery_query_parameters_from_dictionary - Fetch BigQuery query parameters
[10/30/2024 20:32:24 - BigQueryConnector] INFO - build_bigquery_query_parameters_from_dictionary - Successfully built BigQuery query parameters
[10/30/2024 20:32:24 - BigQueryConnector] INFO - read_from_query_config - Querying BigQuery with Parameters
[10/30/2024 20:32:26 - BigQueryConnector] INFO - read_from_query_config - Successfully retrieve data
[10/30/



In [7]:
relevant_users.sample(5)

Unnamed: 0,id,display_name,about_me,age,creation_date,last_access_date,location,reputation,up_votes,down_votes,views,profile_image_url,website_url
10575,10256981,DarkCoder,"<p>Hi, I am an Engineering student and I am Le...",,2018-08-21 22:21:30.070000+00:00,2022-09-25 05:29:45.137000+00:00,"Rourkela, Odisha, India",43,5,0,28,https://graph.facebook.com/1815251615232181/pi...,https://akashlakra789.wixsite.com/home
17837,14151211,Akram Ashraf,,,2020-08-23 07:45:50.957000+00:00,2022-09-25 05:09:15.680000+00:00,Egypt,11,0,0,11,https://i.stack.imgur.com/GcaFf.jpg,
32866,10821659,Zachary Perkins,,,2018-12-21 20:04:05.980000+00:00,2022-09-25 05:46:20.920000+00:00,"San Antonio, TX, USA",91,74,0,11,https://www.gravatar.com/avatar/f52f3480dcd556...,http://zperk.net
29313,12999499,Sekoni,,,2020-03-03 12:27:33.670000+00:00,2022-09-25 04:02:48.267000+00:00,Nigeria,1,0,0,2,https://i.stack.imgur.com/jptZg.png,
28408,973447,Daniel,<p>Mango cowboy disco jock</p>,,2011-09-30 16:12:33.753000+00:00,2022-09-24 22:59:25.583000+00:00,"Huntsville, AL, United States",2973,546,3,167,,http://heatertech.com
