In [28]:
import os
import sys
import toml
import json
import millify
import gspread
import requests
import calendar
import plotly.express as px
import numpy as np
import pandas as pd
import polars as pl
import datetime as dt
import streamlit as st
from pathlib import Path
from millify import prettify
from lxml.html import fromstring
from streamlit_gsheets import GSheetsConnection
from google.oauth2.service_account import Credentials
from oauth2client.service_account import ServiceAccountCredentials
import scipy

sys.path.insert(0, str(Path(os.getcwd()).parent))

from src import utils, schemas, chart_functions as chart



In [21]:
def load_environment_variables(secret_path):
    return toml.load(secret_path)

def authenticate(secrets, scope, workbook_name):
    credentials_file = json.loads(str(environment['connections']['gsheets']).replace("'", '"').replace('\r\n', '\\r\\n'))
    credentials = ServiceAccountCredentials.from_json_keyfile_dict(credentials_file, scopes=scope)
    client = gspread.authorize(credentials)
    wb = client.open(workbook_name)
    return wb

def pad_data(data, length):
    padded_data = [[None if x == "" else x for x in row] + [None] * (length - len(row)) for row in data]
    # for i in range(len(data)):
        # data[i] = [None if x == "" else x for x in data[i]]
        # data[i] = [row + [None] * (length - len(row)) for row in data]
    return padded_data

def load_data(sheet_name, schema):
    sheet = WORKBOOK.worksheet(sheet_name)
    data = sheet.get()
    headers = data[0]
    padded_data = pad_data(data[1:], len(headers))
    loaded_dataframe = pl.DataFrame(padded_data, schema=schema, orient='row', strict=False)
    return loaded_dataframe

environment = load_environment_variables('../.streamlit/secrets.toml')
scope = environment['scopes']['scope']
WORKBOOK = authenticate(environment, scope, 'NLFB')

main_df = utils.load_data('Main', schema=schemas.main_schema, workbook=WORKBOOK)

In [22]:
main_df.head()

Number,ISBN,Month,Year,Title,Score,Author,Publisher,Pages,Author gender,Pub year,Goodreads score,Our score conversion,variance,Debut?,Translated?,Topics
i64,str,str,i64,str,f64,str,str,i64,str,i64,f64,f64,f64,str,str,str
34,"""9780571376483""","""June""",2024,"""Demon Copperhead""",8.192,"""Barbara Kingsolver""","""Faber & Faber""",560,"""Female""",2023,4.5,4.096,-0.404,"""no""","""no""","""Historical, Contemporary, Comi…"
14,"""9781784744649""","""November""",2022,"""Tomorrow, and Tomorrow, and To…",7.84,"""Gabrielle Zevin""","""Vintage Publishing""",416,"""Female""",2022,4.22,3.92,-0.3,"""no""","""no""","""Contemporary, Romance, Coming …"
31,"""9781838930509""","""March""",2024,"""Pachinko""",7.7,"""Min Jin Lee""","""Bloomsbury Publishing PLC""",560,"""Female""",2017,4.33,3.85,-0.48,"""no""","""no""","""Asia, Japan, Historical"""
22,"""9780008532772""","""July""",2023,"""Yellowface""",7.66,"""Rebecca F. Kuang""","""HarperCollins Publishers""",336,"""Female""",2023,4.04,3.83,-0.21,"""no""","""no""","""Thriller, Mystery"""
30,"""9781529111798""","""February""",2024,"""I who have never known men""",7.51,"""Jacqueline Harpman""","""Vintage Publishing""",208,"""Female""",1997,4.22,3.755,-0.465,"""no""","""yes""","""Sci-fi, Dystopian, Feminism, F…"


In [26]:
new_main_df.head()

Number,ISBN,Month,Year,Title,Score,Author,Publisher,Pages,Author gender,Pub year,Goodreads score,Our score conversion,variance,Debut?,Translated?,Topics
i64,str,str,i64,str,f64,str,str,i64,str,i64,f64,f64,f64,str,str,str
34,"""9780571376483""","""June""",2024,"""Demon Copperhead""",8.192,"""Barbara Kingsolver""","""Faber & Faber""",560,"""Female""",2023,4.5,4.096,-0.404,"""no""","""no""","""Historical"""
34,"""9780571376483""","""June""",2024,"""Demon Copperhead""",8.192,"""Barbara Kingsolver""","""Faber & Faber""",560,"""Female""",2023,4.5,4.096,-0.404,"""no""","""no""","""Contemporary"""
34,"""9780571376483""","""June""",2024,"""Demon Copperhead""",8.192,"""Barbara Kingsolver""","""Faber & Faber""",560,"""Female""",2023,4.5,4.096,-0.404,"""no""","""no""","""Coming of age"""
14,"""9781784744649""","""November""",2022,"""Tomorrow, and Tomorrow, and To…",7.84,"""Gabrielle Zevin""","""Vintage Publishing""",416,"""Female""",2022,4.22,3.92,-0.3,"""no""","""no""","""Contemporary"""
14,"""9781784744649""","""November""",2022,"""Tomorrow, and Tomorrow, and To…",7.84,"""Gabrielle Zevin""","""Vintage Publishing""",416,"""Female""",2022,4.22,3.92,-0.3,"""no""","""no""","""Romance"""


In [47]:

new_main_df = (
    main_df
    .with_columns(pl.col("Topics").str.split(", "))
    .explode("Topics")
    
)

new_new = new_main_df.group_by(pl.col("Topics")).agg(pl.col("Title").count()).sort(pl.col("Title"), descending=True)

topics_bar = px.bar(new_new, x="Topics", y="Title")

In [51]:
type(5)

int

In [37]:
new_main_df.group_by(pl.col("Topics")).agg(pl.col("Title").count()).sort(pl.col("Title"), descending=True)

Topics,Title
str,u32
"""Romance""",16
"""Contemporary""",13
"""Mystery""",9
"""Historical""",8
"""Thriller""",6
…,…
"""Greek mythology""",1
"""Gothic""",1
"""African american""",1
"""Nature""",1
