In [1]:
import toml
import json
import millify
import gspread
import requests
import calendar
import numpy as np
import pandas as pd
import polars as pl
import datetime as dt
import streamlit as st
from millify import prettify
import chart_functions as chart
from lxml.html import fromstring
from streamlit_gsheets import GSheetsConnection
from google.oauth2.service_account import Credentials
from oauth2client.service_account import ServiceAccountCredentials

In [168]:
schema = {
    'Number': pl.Int64,
    'ISBN': str,
    'Month': str,
    'Year': pl.Int64,
    'Title': str,
    'Score': pl.Float64,
    'Author': str,
    'Publisher': str,
    'Pages': pl.Int64,
    'Author gender': str,
    'Pub year': pl.Int64,
    'Goodreads score': pl.Float64,
    'Our score conversion': pl.Float64,
    'variance': pl.Float64,
    'Debut?': str,
    'Translated?': str,
    'Topics': str
}

In [169]:
def load_environment_variables(secret_path):
    return toml.load(secret_path)

def authenticate(secrets, scope, workbook_name):
    credentials_file = json.loads(str(environment['connections']['gsheets']).replace("'", '"').replace('\r\n', '\\r\\n'))
    credentials = ServiceAccountCredentials.from_json_keyfile_dict(credentials_file, scopes=scope)
    client = gspread.authorize(credentials)
    wb = client.open(workbook_name)
    return wb


environment = load_environment_variables('.streamlit/secrets.toml')
scope = environment['scopes']['scope']
WORKBOOK = authenticate(environment, scope, 'NLFB')

In [201]:
def pad_data(data, length):
    data = [[None if x == "" else x for x in row] + [None] * (length - len(row)) for row in data]
    # for i in range(len(data)):
        # data[i] = [None if x == "" else x for x in data[i]]
        # data[i] = [row + [None] * (length - len(row)) for row in data]
    return data

In [202]:
def load_data():
    sheet = WORKBOOK.worksheet('Main')
    data = sheet.get()
    headers = data[0]
    padded_data = pad_data(data[1:], len(headers))
    data = pl.DataFrame(padded_data, schema=schema, orient='row', strict=False)
    return data
    

''

In [203]:
df = load_data()

['Number', 'ISBN', 'Month', 'Year', 'Title', 'Score', 'Author', 'Publisher', 'Pages', 'Author gender', 'Pub year', 'Goodreads score', 'Our score conversion', 'variance', 'Debut?', 'Translated?', 'Topics']
17
16
['34', '9780571376483', 'June', '2024', 'Demon Copperhead', '8.192', 'Barbara Kingsolver', 'Faber & Faber', '560', 'Female', '2023', '', '4.096', '4.096', 'no', 'no']
17
['34', '9780571376483', 'June', '2024', 'Demon Copperhead', '8.192', 'Barbara Kingsolver', 'Faber & Faber', '560', 'Female', '2023', None, '4.096', '4.096', 'no', 'no', None]


In [None]:
df.head()

Number,ISBN,Month,Year,Title,Score,Author,Publisher,Pages,Author gender,Pub year,Goodreads score,Our score conversion,variance,Debut?,Translated?,Topics
i64,str,str,i64,str,f64,str,str,i64,str,i64,f64,f64,f64,str,str,str
34,"""9780571376483""","""June""",2024,"""Demon Copperhead""",8.192,"""Barbara Kingsolver""","""Faber & Faber""",560,"""Female""",2023,,4.096,4.096,"""no""","""no""",
14,"""9781784744649""","""November""",2022,"""Tomorrow, and Tomorrow, and To…",7.84,"""Gabrielle Zevin""","""Vintage Publishing""",416,"""Female""",2022,4.22,3.92,-0.3,"""no""","""no""",
31,"""9781838930509""","""March""",2024,"""Pachinko""",7.7,"""Min Jin Lee""","""Bloomsbury Publishing PLC""",560,"""Female""",2017,,3.85,3.85,"""no""","""no""","""Romance, Crime, War, Music, Pe…"
22,"""9780008532772""","""July""",2023,"""Yellowface""",7.66,"""Rebecca F. Kuang""","""HarperCollins Publishers""",336,"""Female""",2023,4.04,3.83,-0.21,"""no""","""no""",
30,"""9781529111798""","""February""",2024,"""I who have never known men""",7.51,"""Jacqueline Harpman""","""Vintage Publishing""",208,"""Female""",1997,,3.755,3.755,"""no""","""yes""",
