In [1]:
import os
import pandas as pd
import pyarrow
from google.cloud import bigquery

# Create the new DataSet

In [6]:
# Define a function to create a new dataset in BigQuery
def create_dataset(project_id, dataset_id):
    # Set the environment variable to point to your service account key file
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "key-file.json"

    # Initialize the BigQuery client
    client = bigquery.Client(project=project_id)

    # Construct a full Dataset object to send to the API
    dataset = bigquery.Dataset(f"{project_id}.{dataset_id}")

    # Specify the geographic location where the dataset should reside
    dataset.location = "US"

    try:
        # Send the dataset to the API for creation
        dataset = client.create_dataset(dataset, timeout=30)
        print(f"Dataset {dataset.dataset_id} created.")
    except Exception as e:
        print(f"Failed to create dataset: {e}")

In [9]:
# Create variables for the project id and the new dataset id
project_id = "project-3-415202"
dataset_id = "seismic_activity_and_injection_wells"


In [10]:
# Pass the function to create the data set in Big Query
create_dataset(project_id, dataset_id)

Dataset seismic_activity_and_injection_wells created.


# Creating tables in the DataSet

## Define a function to create the tables

In [13]:
# Define a function to create a new table in BigQuery
def create_table(project_id, dataset_id, table_id, schema):
    # Set the environment variable to point to your service account key file
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "key-file.json"

    # Initialize the BigQuery client
    client = bigquery.Client(project=project_id)

    # Define the dataset reference
    dataset_ref = client.dataset(dataset_id)

    # Define the table schema
    table_schema = []
    for field in schema:
        table_schema.append(bigquery.SchemaField(field['name'], field['type']))

    # Define the table reference
    table_ref = dataset_ref.table(table_id)

    # Define the table object
    table = bigquery.Table(table_ref, schema=table_schema)

    try:
        # Send the table to the API for creation
        table = client.create_table(table) 
        print(f"Table {table_id} created.")
    except Exception as e:
        print(f"Failed to create table: {e}")

### Create the earthquakes table

In [16]:
# Create variables for the earthquakes table
project_id = "project-3-415202"
dataset_id = "seismic_activity_and_injection_wells"
table_id = "earthquakes"
schema = [
    {"name": "Latitude", "type": "NUMERIC"},
    {"name": "Longitude", "type": "NUMERIC"},
    {"name": "Magnitude", "type": "NUMERIC"},
    {"name": "Event_Date", "type": "DATE"}
]

In [17]:
# Create the table
create_table(project_id, dataset_id, table_id, schema)

Table earthquakes created.


### Create the injectionVolumes table

In [20]:
# Create variables for the injectionVolumes table
project_id = "project-3-415202"
dataset_id = "seismic_activity_and_injection_wells"
table_id = "injectionVolumes"
schema = [
    {"name": "API Number", "type": "INTEGER"},
    {"name": "Surface Longitude", "type": "NUMERIC"},
    {"name": "Surface Latitude", "type": "NUMERIC"},
    {"name": "Injection Date", "type": "DATE"},
    {"name": "Injection End Date", "type": "DATE"},
    {"name": "Volume Injected: BBLs", "type": "NUMERIC"}
]

In [21]:
# Create the table
create_table(project_id, dataset_id, table_id, schema)

Table injectionVolumes created.


### Create the pressureData table

In [27]:
# Create variables for the injectionVolumes table
project_id = "project-3-415202"
dataset_id = "seismic_activity_and_injection_wells"
table_id = "pressureData"
schema = [
    {"name": "Time", "type": "DATE"},
    {"name": "Pressure", "type": "NUMERIC"},
    {"name": "Layer", "type": "STRING"},
    {"name": "Longitude", "type": "NUMERIC"},
    {"name": "Latitude", "type": "NUMERIC"}
]

In [28]:
# Create the table
create_table(project_id, dataset_id, table_id, schema)

Table pressureData created.


# Uploading data into the tables

## Define a function to upload the data

In [35]:
def load_csv_to_bigquery(csv_file_path, project_id, dataset_id, table_id, schema=None):
    # Set the environment variable to point to your service account key file
    os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "key-file.json"

    # Load CSV file into a Pandas DataFrame
    df = pd.read_csv(csv_file_path)

    # Define table reference
    table_ref = f"{dataset_id}.{table_id}"

    # Write DataFrame to BigQuery
    df.to_gbq(destination_table=table_ref,
              project_id=project_id,
              if_exists='replace',  # Replace table if it already exists
              table_schema=schema)  # Optional explicit schema

    print(f"CSV file '{csv_file_path}' successfully loaded into BigQuery table '{table_ref}'.")

### Upload the earthquakes data

In [36]:
# Define variables for the table
csv_file_path = os.path.join('..', 'earthquake-data', 'earthquakes.csv')
project_id = 'project-3-415202'
dataset_id = 'seismic_activity_and_injection_wells'
table_id = 'earthquakes'

In [38]:
# Run the function
load_csv_to_bigquery(csv_file_path, project_id, dataset_id, table_id)

  df.to_gbq(destination_table=table_ref,
100%|██████████| 1/1 [00:00<?, ?it/s]

CSV file '..\earthquake-data\earthquakes.csv' successfully loaded into BigQuery table 'seismic_activity_and_injection_wells.earthquakes'.





### Upload the injectionVolumes data

In [3]:
# Define variables for the table
csv_file_path = os.path.join('..', 'injectionVolumes-data', 'injectionVolumes.csv')
project_id = 'project-3-415202'
dataset_id = 'seismic_activity_and_injection_wells'
table_id = 'injectionVolumes'

In [5]:
# Run the function
load_csv_to_bigquery(csv_file_path, project_id, dataset_id, table_id)

  df.to_gbq(destination_table=table_ref,
100%|██████████| 1/1 [00:00<?, ?it/s]

CSV file '..\injectionVolumes-data\injectionVolumes.csv' successfully loaded into BigQuery table 'seismic_activity_and_injection_wells.injectionVolumes'.





### Upload the Pressure Data

In [6]:
# Define variables for the table
csv_file_path = os.path.join('..', 'pressure-data', 'Updated_Pressure_Data_with_LatLon.csv')
project_id = 'project-3-415202'
dataset_id = 'seismic_activity_and_injection_wells'
table_id = 'pressureData'

In [7]:
# Run the function
load_csv_to_bigquery(csv_file_path, project_id, dataset_id, table_id)

  df.to_gbq(destination_table=table_ref,
100%|██████████| 1/1 [00:00<?, ?it/s]

CSV file '..\pressure-data\Updated_Pressure_Data_with_LatLon.csv' successfully loaded into BigQuery table 'seismic_activity_and_injection_wells.pressureData'.



