# Automated Updates and Calculations in 'DeFi Landscape LP Opportunities'

## 1. Setup

### Install and import all relevant libraries

In [68]:
%%capture
!pip install gspread
!pip install pandas
!pip install gspread-dataframe
!pip install oauth2client
!pip install PyDrive

In [69]:
# Import all relevant libraries
import pandas as pd
import json
from datetime import datetime as dt
import csv
import math
import requests
import gspread
import gspread_dataframe as gd
from oauth2client.service_account import ServiceAccountCredentials
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from collections import defaultdict

### Connect to Google Sheets API and setup Defillama API

In [70]:
# Set up Google Sheets API credentials
scope = ['https://spreadsheets.google.com/feeds', 'https://www.googleapis.com/auth/drive']
credentials = ServiceAccountCredentials.from_json_keyfile_name('lp-landscape-analysis-dd6d6479b244.json', scope)
gc = gspread.authorize(credentials)

In [71]:
# API TVL base URL
tvl_base_url = 'https://api.llama.fi'

# API Yields base URL
yields_base_url = 'https://yields.llama.fi'

## 2. Update Data in 'DeFi Landscape LP Opportunities'

### Helper functions

In [72]:
# HELPER FUNCTIONS

# Writes a list of values to a column of the input sheet starting at starting_cell
def write_to_column(sheet, starting_cell, values):
  sheet.update(starting_cell, [[i] for i in values], value_input_option="USER_ENTERED")
  print("Values written to sheet successfully")

# Converts a dataframe column from str to date format in-place
def df_str_to_date(df, column_name, date_format):
  df[column_name] = pd.to_datetime(df[column_name], format=date_format, errors='raise')
  df[column_name] = df[column_name].dt.date

### Open spreadsheet and load relevant tabs

In [73]:
# Open the Google Sheet we'll be reading and writing to
lp_landscape = gc.open('DeFi Landscape LP Opportunities_v2')

In [74]:
# Select the 'Project Ratings' tab and read the data into a dataframe
project_ratings_sheet = lp_landscape.worksheet("Project Ratings")
lp_project_ratings = project_ratings_sheet.get_all_records()
lp_project_ratings_df = pd.DataFrame(lp_project_ratings)

# Select the 'Stables' tab and read the data into a dataframe
stables_sheet = lp_landscape.worksheet("Stables")
lp_stables = stables_sheet.get_all_records()
lp_stables_df = pd.DataFrame(lp_stables)

# Select 'Pool Yields' tab
pool_yields_sheet = lp_landscape.worksheet("Pool Yields")

# Select 'Protocol Historicals' tab
protocol_historicals_sheet = lp_landscape.worksheet("Protocol Historicals")

# Select 'Pool Historicals' tab
pool_historicals_sheet = lp_landscape.worksheet("Pool Historicals")

# Select 'Protocol Info' tab
protocol_info_sheet = lp_landscape.worksheet("Protocol Info")

# Select 'Historical Chain TVL' tab
historical_chain_tvl_sheet = lp_landscape.worksheet("Historical Chain TVL")

### 2.1. Update data in 'Project Ratings' tab

In [75]:
# Extract protocol names from the worksheet as a list in the format used by Defillama API (slug)
protocol_slugs = lp_project_ratings_df["API Protocol Name"].tolist()

protocol_slugs = [protocol for protocol in protocol_slugs if protocol]


In [76]:
# Write output to CSV file 
protocols_filename = 'protocol_slugs.csv'

# Open the file in write mode
with open(protocols_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Writing each item in the list to the file
    for slug in protocol_slugs:
        writer.writerow([slug])

#### 2.1.1. Update current protocol TVLs

In [77]:
# Get current TVLs for the list of protocols and return them as a list
protocol_tvls = [requests.get(tvl_base_url + '/tvl/' + protocol).json() for protocol in protocol_slugs]

# Format numbers as millions before writing to spreadsheet
protocol_tvls_m = [int(tvl) / 1000000 for tvl in protocol_tvls]

In [78]:
# Write results stored in protocol_tvls to 'Current TVL ($m)' in 'Project Ratings' tab to update current protocol TVLs
write_to_column(project_ratings_sheet, "C2", protocol_tvls_m)

  sheet.update(starting_cell, [[i] for i in values], value_input_option="USER_ENTERED")


Values written to sheet successfully


#### 2.1.2. Update 1-year average protocol TVLs

In [79]:
# Pull historical TVL data for each protocol in the list broken down by token and chain
protocol_historicals = [requests.get(tvl_base_url + '/protocol/' + protocol).json() for protocol in protocol_slugs]

# List of keys we want to keep
fields = ['name','currentChainTvls']

##### Sanity check to compare aggregated TVL to actual current TVL shown by Defillama

In [80]:
# Create a list of dictionaries with protocol name as the key as current TVLs
# by chain as the value for every protocol in the list

# List to store broken down results
protocol_tvls_by_chain = []

# List to store aggreagated TVL results
aggregated_protocol_tvls = []

# Iterate protocol list
for protocol in protocol_historicals:
  # keep only relevant keys
  p = dict((k, protocol[k]) for k in fields if k in protocol)

  # Create key,value pair from the values associated with the two keys left
  p = {protocol['name']: protocol['currentChainTvls']}

  # Iterate nested dict and remove unwanted chains to prevent double counting TVL
  for k,v in p.items():
    for x in list(v.keys()):
      if "borrowed" in x or "staking" in x or 'pool2' in x:
        del v[x]
    aggregated = (k, sum(v.values()))

  # Add protocol to result list
  protocol_tvls_by_chain.append(p)

  # Add protocol overall current TVL to aggregated list
  aggregated_protocol_tvls.append(aggregated)

In [81]:
# COMPARE AGGREGATED VS. ACTUAL TVLs

current_aggregated_protocol_tvls = [x[1] for x in aggregated_protocol_tvls]
protocol_names = [x[0] for x in aggregated_protocol_tvls]

# Create tuples for each protocol comparing aggregated TVL to TVL shown by DL
protocol_tvl_deltas = list(zip(protocol_names, zip(current_aggregated_protocol_tvls, protocol_tvls)))

In [82]:
protocol_tvl_deltas

[('AAVE', (6169791107.24858, 6169791107.248594)),
 ('Curve Finance', (1945417529.10718, 1939004175.7370179)),
 ('Compound', (1150882528.97724, 1145397246.3872075)),
 ('Convex Finance', (1999411054.82827, 1999411054.828274)),
 ('Reserve', (27295026.85789, 27295026.85788813)),
 ('MakerDAO', (5790214660.16232, 5597384507.413509)),
 ('Spark', (1439376104.36094, 1423021289.9270036)),
 ('Yearn Finance', (350811777.80176, 350811777.8017609)),
 ('Frax Finance', (956742663.25313, 956742663.253129)),
 ('Goldfinch', (1117831.27696, 1117831.2769619313)),
 ('Balancer', (910754091.4576501, 910754091.4576555)),
 ('Flux Finance', (29111812.15055, 29111812.150552373)),
 ('Summer.fi', (2865308489.6061, 2865210937.4579725)),
 ('Stargate', (322015705.25694, 322015705.2569519)),
 ('Aura', (387027282.93618, 387027282.9361881)),
 ('Abracadabra', (144636445.03667998, 144636445.0366732)),
 ('Velodrome', (126764999.59662, 126764999.59661971)),
 ('Idle', (32001115.472319998, 32001115.472324044)),
 ('Gains Networ

##### Aggregate historical TVL broken down by token and chain to get overall historical protocol TVLs

In [83]:
# Create a list of dictionaries with protocol name as the key and historical TVL
# by token and chain as the value for every protocol in the list

# List to store broken down results
clean_protocol_historicals = []

# Iterate protocol list
for protocol in protocol_historicals:
  # Keep only relevant keys
  p = dict((k, protocol[k]) for k in fields if k in protocol)

  # Create key,value pair from the values associated with the two keys left
  p = {protocol['name']: protocol['chainTvls']}

  # Iterate nested dict and remove unwanted chains to prevent double counting TVL
  for k,v in p.items():
    for x in list(v.keys()):
      if "borrowed" in x or "staking" in x or 'pool2' in x:
        del v[x]

  # Add protocol to result list
  clean_protocol_historicals.append(p)

In [84]:
# List to store the final result
historical_protocol_tvls = []

# Aggregate TVL data per date for each chain
for protocol in clean_protocol_historicals:

  # Dictionary to store result
  aggregated_historicals = defaultdict(float)

  for p, d1 in protocol.items():
    for chain, chain_data in d1.items():
        for entry in chain_data["tvl"]:
            date = entry["date"]
            totalLiquidityUSD = float(entry["totalLiquidityUSD"])
            aggregated_historicals[date] += totalLiquidityUSD

  # Convert the aggregated data to a sorted list of tuples
  sorted_aggregated_historicals = sorted(aggregated_historicals.items(), key=lambda x: x[0])

  # Convert the aggregated data to a dataframe
  aggregated_historicals_df = pd.DataFrame(sorted_aggregated_historicals, columns=['date', 'totalLiquidityUSD'])

  # Convert the 'Date' column from UNIX timestamp to datetime format
  aggregated_historicals_df['date'] = pd.to_datetime(aggregated_historicals_df['date'], unit='s')
  aggregated_historicals_df['date'] = aggregated_historicals_df['date'].dt.date

  # Add dataframe to result list
  historical_protocol_tvls.append(aggregated_historicals_df)

In [85]:
# Order by date in reverse chronological order
historical_protocol_tvls = [df.sort_values(by='date', ascending=False) for df in historical_protocol_tvls]

# Create a common index for all dataframes to conserve the order when concatenating
for df in historical_protocol_tvls:
    df.index = range(len(df))

# Concatenate every dataframe in the list into one dataframe
protocol_historicals_df = pd.concat(historical_protocol_tvls, axis = 1, sort=False)

In [86]:
# Insert 1 blank cell between each protocol slug in the list for spreadsheet formatting
formatted_protocol_slugs = []

for slug in protocol_slugs:
  formatted_protocol_slugs.append(slug)
  formatted_protocol_slugs.append("")
formatted_protocol_slugs.pop()

# Clear every cell in the sheet
protocol_historicals_sheet.clear()

# Write formatted list as first row of the 'Protocol Historicals' sheet for indexing
protocol_historicals_sheet.update("A1", [formatted_protocol_slugs])

# Update data in 'Protocol Historicals' sheet under first row
gd.set_with_dataframe(protocol_historicals_sheet, protocol_historicals_df, row=2,
                      include_index=False, include_column_header=True, resize=True)

  protocol_historicals_sheet.update("A1", [formatted_protocol_slugs])


### 2.2 Update 'Pool Yields' raw data tab

In [87]:
# Get all pool yields
yields = requests.get(yields_base_url + '/pools')

# Convert Yields response to data frame
pool_yields_df = pd.DataFrame(yields.json()['data'])

In [88]:
# Clear every cell in the sheet
pool_yields_sheet.clear()

# Update data in 'Pool Yields' sheet
gd.set_with_dataframe(pool_yields_sheet, pool_yields_df, include_index=False,
                      include_column_header=True, resize=True)

### 2.3 Update 'Pool Historicals' raw data tab

In [89]:
# Extract pool ID's from the worksheet as a list
pool_ids = lp_stables_df["API pool id"].tolist()
pool_ids = [id for id in pool_ids if id]

In [90]:
# Write output to CSV file 
pool_ids_filename = 'pool_ids.csv'

# Open the file in write mode
with open(pool_ids_filename, mode='w', newline='') as file:
    writer = csv.writer(file)
    
    # Writing each item in the list to the file
    for id in pool_ids:
        writer.writerow([id])

In [91]:
# Get historical TVL and APY data for each pool and return it as a list of data frames
pool_historicals = [requests.get(yields_base_url + '/chart/' + id).json()['data'] for id in pool_ids]
pool_historicals_dfs = [pd.DataFrame(i).drop(['il7d', 'apyBase7d'], axis=1) for i in pool_historicals]

# Convert timestamp format from str to date in every data frame
date_format = '%Y-%m-%dT%H:%M:%S.%fZ'

for df in pool_historicals_dfs:
  df_str_to_date(df, 'timestamp', date_format)

# Order by date in reverse chronological order
pool_historicals_dfs = [df.sort_values(by='timestamp', ascending=False) for df in pool_historicals_dfs]

In [92]:
# Create a common index for all dataframes to conserve the order when concatenating
for df in pool_historicals_dfs:
    df.index = range(len(df))

# Concatenate every data frame in the list into one data frame
pool_historicals_df = pd.concat(pool_historicals_dfs, axis = 1, sort=False)

# Insert 4 blank elements between each pool id in the list for spreadsheet formatting
formatted_pool_ids = []

for id in pool_ids:
  formatted_pool_ids.append(id)
  for i in range(4):
    formatted_pool_ids.append("")
formatted_pool_ids.pop()

''

In [93]:
# Clear every cell in the sheet
pool_historicals_sheet.clear()

# Write formatted list as first row of the 'Pool Historicals' sheet for indexing
pool_historicals_sheet.update("A1", [formatted_pool_ids])

# Update data in 'Pool Historicals' sheet under first row
gd.set_with_dataframe(pool_historicals_sheet, pool_historicals_df, row=2,
                      include_index=False, include_column_header=True, resize=True)

  pool_historicals_sheet.update("A1", [formatted_pool_ids])


### 2.4. Update 'Historical Chain TVL' raw data tab

In [107]:
# Get historical TVL data for Ethereum
eth_tvl = requests.get(tvl_base_url + '/v2/historicalChainTvl/Ethereum')

# Convert response to data frame
eth_tvl_df = pd.DataFrame(eth_tvl.json())

# Convert the 'Date' column from UNIX timestamp to datetime format
eth_tvl_df['date'] = pd.to_datetime(eth_tvl_df['date'], unit='s')
eth_tvl_df['date'] = eth_tvl_df['date'].dt.date

# Order by date in reverse chronological order
eth_tvl_df = eth_tvl_df.sort_values(by='date', ascending=False)

In [108]:
# Clear every cell in the sheet
historical_chain_tvl_sheet.clear()

# Update data in 'Historical Chain TVL' sheet
gd.set_with_dataframe(historical_chain_tvl_sheet, eth_tvl_df, include_index=False,
                      include_column_header=True, resize=True)

### 2.5. Update 'Protocol Info' raw data tab

In [109]:
# Get all protocols
protocols = requests.get(tvl_base_url + '/protocols')

# Convert Protocols response to data frame
protocols_df = pd.DataFrame(protocols.json())

In [110]:
# Clear every cell in the sheet
protocol_info_sheet.clear()

# Update data in 'Pool Yields' sheet
gd.set_with_dataframe(protocol_info_sheet, protocols_df, include_index=False,
                      include_column_header=True, resize=True)

### 2.6. Update data in 'LP Update Historicals' tab for LP Landscape Update distribution charts

In [111]:
# Create dictionary to associate all pool ids to their historical data
all_pools_dict = dict(zip(pool_ids, pool_historicals_dfs))

In [112]:
# Select the 'eUSD Curve Comps' tab and read the data into a dataframe
eusd_curve_sheet = lp_landscape.worksheet("eUSD Curve Comps")
lp_eusd_curve = eusd_curve_sheet.get_all_records()
lp_eusd_curve_df = pd.DataFrame(lp_eusd_curve)

# Select 'LP Update Historicals' tab
lp_update_historicals_sheet = lp_landscape.worksheet("LP Update Historicals")

In [113]:
# Extract pool names from the worksheet as a list
eusd_peer_pool_ids = lp_eusd_curve_df["API pool id"].tolist()
eusd_peer_pool_ids = [id for id in eusd_peer_pool_ids if id]


# MIM-3CRV pool id
mim_3crv_pool_id = "8a20c472-142c-4442-b724-40f2183c073e"

# Remove MIM-3CRV pool from list if present
if mim_3crv_pool_id in eusd_peer_pool_ids: eusd_peer_pool_ids.remove(mim_3crv_pool_id)

# Keep only data for pools contained in pool id list
eusd_curve_peer_pools = {k: all_pools_dict[k] for k in eusd_peer_pool_ids}

In [114]:
# Separate values (dataframes) from keys (pool id's) before concatenating
eusd_curve_peer_pools_dfs = list(eusd_curve_peer_pools.values())

# Concatenate every dataframe in the list into one dataframe
eusd_curve_peer_pools_df = pd.concat(eusd_curve_peer_pools_dfs, axis = 1, sort=False)

In [115]:
# Insert 4 blank elements between each pool id in the list for spreadsheet formatting
formatted_peer_pool_ids = []

for id in eusd_curve_peer_pools:
  formatted_peer_pool_ids.append(id)
  for i in range(4):
    formatted_peer_pool_ids.append("")
formatted_peer_pool_ids.pop()

''

In [116]:
# Clear every cell in the sheet
lp_update_historicals_sheet.clear()

# Write formatted list as first row of the 'Pool Historicals' sheet for indexing
lp_update_historicals_sheet.update("A1", [formatted_peer_pool_ids])

# Update data in 'LP Update Historicals' sheet under first row
gd.set_with_dataframe(lp_update_historicals_sheet, eusd_curve_peer_pools_df, row=2,
                      include_index=False, include_column_header=True, resize=True)

  lp_update_historicals_sheet.update("A1", [formatted_peer_pool_ids])


### 2.7. Aggregate historical data to create indices for RToken peer groups

In [118]:
# Select the 'hyUSD Comps' tab and read the data into a dataframe
hyusd_sheet = lp_landscape.worksheet("hyUSD Comps")
lp_hyusd = hyusd_sheet.get_all_records()
lp_hyusd_df = pd.DataFrame(lp_hyusd)

In [119]:
# RToken pool id's
eusd_pool_id = "381b00d5-b4f8-489c-95cb-40018c72bdd3"
hyusd_pool_id = "3378bced-4bde-4ccf-b742-7d5c8ebb7720"

In [120]:
# HELPER FUNCTIONS

# Aggregate historicals for peer pools, store mean values, and return as dataframe
def aggregate_historicals(all_pools_dict, pool_id_list):
  # Keep only data for pools contained in pool_id_list
  peer_pools = {k: all_pools_dict[k] for k in pool_id_list}

  # Separate values (dataframes) from keys (pool id's) before concatenating
  peer_pools_dfs = list(peer_pools.values())

  # Concatenate all dataframes
  peer_pools_df = pd.concat(peer_pools_dfs)

  # Group by timestamp and compute the mean for each group
  aggregated_df = peer_pools_df.groupby('timestamp').mean().reset_index()

  # Order by date in reverse chronological order
  aggregated_df = aggregated_df.sort_values(by='timestamp', ascending=False)

  return aggregated_df

#### hyUSD Comps

In [122]:
# Extract pool names from the worksheet as a list
hyusd_peer_pool_ids = lp_hyusd_df["API pool id"].tolist()
hyusd_peer_pool_ids = [id for id in hyusd_peer_pool_ids if id]

# Remove hyUSD from list
if hyusd_pool_id in hyusd_peer_pool_ids: hyusd_peer_pool_ids.remove(hyusd_pool_id)

# Aggregate historicals for peer pools and store mean values as dataframe
hyusd_peers_df = aggregate_historicals(all_pools_dict, hyusd_peer_pool_ids)

  peer_pools_df = pd.concat(peer_pools_dfs)
