# Setup git cloning 

In [None]:

! git clone https://github.com/anushagj/friend-up-your-cash-app-game.git
! pip install prefect==1.0 -U

# Prefect Tutorial 

In [4]:
import prefect
from prefect import task, Flow
@task
def hello_task():
  logger = prefect.context.get("logger")
  logger.info("Hello world!")

flow = Flow("hello-flow", tasks=[hello_task])
flow.run()


[2023-08-18 10:00:23+0000] INFO - prefect.FlowRunner | Beginning Flow run for 'hello-flow'
[2023-08-18 10:00:23+0000] INFO - prefect.TaskRunner | Task 'hello_task': Starting task run...
[2023-08-18 10:00:23+0000] INFO - prefect.hello_task | Hello world!
[2023-08-18 10:00:23+0000] INFO - prefect.TaskRunner | Task 'hello_task': Finished task run for task with final state: 'Success'
[2023-08-18 10:00:23+0000] INFO - prefect.FlowRunner | Flow run SUCCESS: all reference tasks succeeded


<Success: "All reference tasks succeeded.">

# Setup Prefect Cloud

In [None]:
! prefect agent local start

[2023-08-18 10:05:36,401] INFO - agent | Registering agent...
[2023-08-18 10:05:36,821] INFO - agent | Registration successful!

 ____            __           _        _                    _
|  _ \ _ __ ___ / _| ___  ___| |_     / \   __ _  ___ _ __ | |_
| |_) | '__/ _ \ |_ / _ \/ __| __|   / _ \ / _` |/ _ \ '_ \| __|
|  __/| | |  __/  _|  __/ (__| |_   / ___ \ (_| |  __/ | | | |_
|_|   |_|  \___|_|  \___|\___|\__| /_/   \_\__, |\___|_| |_|\__|
                                           |___/

[2023-08-18 10:05:37,228] INFO - agent | Starting LocalAgent with labels ['1138c363a735']
[2023-08-18 10:05:37,228] INFO - agent | Agent documentation can be found at https://docs.prefect.io/orchestration/
[2023-08-18 10:05:37,228] INFO - agent | Waiting for flow runs...
[2023-08-18 10:08:57,116] INFO - agent | Deploying flow run 1458fe29-bf9c-46f7-8dc3-c1f07f10c18d to execution environment...
[2023-08-18 10:08:57,492] INFO - agent | Completed deployment of flow run 1458fe29-bf9c-46f7-8dc3-c1f07f

In [10]:
! prefect auth login --key {PASTE_API_KEY}


[32mLogged in to Prefect Cloud tenant "julietest96@gmail.com's Account" (julietest96-gmail-com-s-account)[0m


In [None]:
! prefect create project cash_find_friends

In [11]:
flow.register(project_name="cash_find_friends")

Flow URL: https://cloud.prefect.io/julietest96-gmail-com-s-account/flow/33d8a45b-a607-45b5-8f9e-0791edc8f94a
 └── ID: e386800a-6b05-41a9-a4fe-6faaec3438fc
 └── Project: cash_find_friends
 └── Labels: ['1138c363a735']


'e386800a-6b05-41a9-a4fe-6faaec3438fc'

In [None]:
Next we follow the link that was generated and select quick run and we will see our flow run in the cloud !

In [None]:
! prefect agent local start

In [None]:
import parquet
import os
from google.cloud import bigquery
from prefect import task, Flow, Parameter

#TO BE UPDATED BY YOU
PROJECT_ID = "ghc23-394604"
DATASET_NAME = "Friends"
TABLE_NAME = "cash_friends3"

#TO BE UPDATED BY YOU
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/content/ghc23/ghc23-394604-07e797cf0921.json"


# Function to create a new table in BigQuery
def create_table(project_id, dataset_name, table_name):
  client = bigquery.Client(project=project_id)

  # Define the schema for your table (change the fields accordingly)
  schema = [
    bigquery.SchemaField("user_id", "STRING"),
    bigquery.SchemaField("age_on_cash_app", "INTEGER"),
    bigquery.SchemaField("gender", "STRING"),
    bigquery.SchemaField("transaction_count", "INTEGER"),
    bigquery.SchemaField("total_amount_ever_spent", "INTEGER"),
    bigquery.SchemaField("current_amount", "INTEGER"),
    bigquery.SchemaField("bitcoin_holdings", "FLOAT"),
    bigquery.SchemaField("stock_holdings", "INTEGER"),
    bigquery.SchemaField("cash_card_usage", "STRING"),
    bigquery.SchemaField("direct_deposit", "STRING"),
    bigquery.SchemaField("cash_boost_used", "STRING"),
    bigquery.SchemaField("account_creation_date", "STRING"),
    bigquery.SchemaField("cashtag", "STRING"),
    bigquery.SchemaField("most_interacted_user_id", "STRING"),
    bigquery.SchemaField("occupation", "STRING"),
    bigquery.SchemaField("location", "STRING"),
    bigquery.SchemaField("most_used_cash_app_feature", "STRING")
  ]

  table_ref = client.dataset(dataset_name).table(table_name)
  table = bigquery.Table(table_ref, schema=schema)

  # Create the table
  table = client.create_table(table)
  print(f"Table {table.project}.{table.dataset_id}.{table.table_id} created.")


# Function to upload Parquet data to BigQuery table
def upload_parquet_to_bigquery(parquet_file_path, project_id, dataset_name, table_name):
  client = bigquery.Client(project=project_id)
  dataset_ref = client.dataset(dataset_name)
  table_ref = dataset_ref.table(table_name)

  with open(parquet_file_path, "r") as f:
    # Assuming the first row contains the column headers
    reader = parquet.DictReader(f)
    rows_to_insert = [row for row in reader]

  errors = client.insert_rows_json(table_ref, rows_to_insert)
  if errors:
    raise ValueError(f"Error uploading data to BigQuery: {errors}")

In [None]:
# Path to your parquet file
parquet_file_path = "CashFriends.parquet"

# Create the table (only needed if the table doesn't already exist)
create_table(PROJECT_ID, DATASET_NAME, TABLE_NAME)

# Upload the CSV data to the table
upload_csv_to_bigquery(csv_file_path, PROJECT_ID, DATASET_NAME, TABLE_NAME)

# New Section
