In [1]:
from collections import defaultdict
from openai import OpenAI
import replicate
from elasticsearch import Elasticsearch
from elasticsearch.exceptions import AuthenticationException, ConnectionError
from transformers import T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer
import chromadb
import os
from dotenv import load_dotenv
import json
from tqdm.auto import tqdm
import itertools
import pandas as pd

load_dotenv()

from utils import search_podcasts
from ingest import create_index, download_podcast, transcribe_podcast, encode_podcast, index_podcast
from rag import rag, search, llm

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package punkt_tab to
[nltk_data]     /home/minasonbol/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


# Setup

In [2]:
def update_session(**kwargs):
    for k, v in kwargs.items():
        session_state[k] = v

def text_input(input_text):
    return input(input_text)

def choose_podcast_option(episode_option):
    update_session(episode_option_selected=False)
    if episode_option == "1. Try a sample":
        update_session(episode_option_selected=True, episode_option=episode_option)
    elif episode_option == "2. Provide the iTunes URL for a specific podcast episode":
        episode_url = text_input("Enter the iTunes URL of the episode you want:")
        update_session(episode_option_selected=True, episode_option=episode_option, episode_url=episode_url)
    elif episode_option == "3. Provide a name of a podcast to explore its most recent episode":
        term = text_input("Enter a search term for podcasts:")
        try:
            if term != '':
                found_podcasts = search_podcasts(term)
                if found_podcasts['status'] == 'Fail':
                    raise Exception
                else:
                    podcast_names = [f"{podcast['collectionName']} by {podcast['artistName']}" for podcast in found_podcasts['podcasts']]
                    selected_podcast = selectbox("Select a podcast:", podcast_names)
                    selected_index=podcast_names.index(selected_podcast)
                    update_session(episode_option_selected=True, episode_option=episode_option, found_podcasts=found_podcasts['podcasts'], selected_index=selected_index)
        except Exception:
            print("Please enter a valid search term.")

def choose_encoder(sentence_encoder):
    update_session(sentence_encoder_selected=False)
    if sentence_encoder == "1. T5":
        encoder=SentenceTransformer("sentence-transformers/sentence-t5-base")
        update_session(sentence_encoder_selected=True, sentence_encoder=sentence_encoder, encoder=encoder)
    elif sentence_encoder == "2. OpenAI":
        embedding_model = "text-embedding-3-large"
        openai_api_key = text_input("OpenAI API Key", key="file_oa_api_key", type="password")
        if openai_api_key != '':
            try:
                oa_embedding_client = OpenAI(api_key=openai_api_key)
                response = oa_embedding_client.models.list()
                update_session(sentence_encoder_selected=True, sentence_encoder=sentence_encoder, embedding_client=oa_embedding_client, embedding_model=embedding_model)
            except:
                print("Invalid API key. Please provide a valid API token.")

def choose_transcription_method(transcription_method, session_state):
    if session_state.get('episode_option', False):
        if session_state['episode_option'] != "1. Try a sample":
            update_session(transcription_method_selected=False)
            if transcription_method=="1. Replicate":
                replicate_api_key = os.getenv('REPLICATE_API_KEY')
                if replicate_api_key != '':
                    try:
                        replicate_client = replicate.Client(api_token=replicate_api_key)
                        response = replicate_client.models.list()
                        update_session(transcription_method_selected=True, transcription_method=transcription_method, transcription_client=replicate_client)
                    except:
                        print("Invalid API key. Please provide a valid API token.")
            elif transcription_method=="2. Local transcription":
                update_session(transcription_method_selected=True, transcription_method=transcription_method)
        else:
            print("The sample podcast doesn't require a transcription method.")
            update_session(transcription_method_selected=True)

def choose_vector_db(vector_db):
    update_session(index_name="podcast-transcriber", vector_db_selected=False)
    if vector_db=="1. Minsearch":
        update_session(vector_db=vector_db)
        update_session(index=create_index(**session_state))
        update_session(vector_db_selected=True, index_created=True)
        print(f"Index {session_state['index'].index_name} was created successfully.")
    elif vector_db=="2. Elasticsearch":
        elasticsearch_api_key = os.getenv('ES_API_KEY')
        elasticsearch_cloud_id = os.getenv('ES_CLOUD_ID')
        if elasticsearch_api_key != '' and elasticsearch_cloud_id != '':
            try:
                es_client = Elasticsearch(cloud_id=elasticsearch_cloud_id, api_key=elasticsearch_api_key)
                response = es_client.cluster.health()
                update_session(vector_db=vector_db, vector_db_client=es_client)
                update_session(index=create_index(**session_state))
                update_session(vector_db_selected=True, index_created=True)
                print(f"Index {[k for k,v in session_state['index'].items()][0]} was created successfully.")
            except AuthenticationException:
                print("Invalid API key or Cloud ID. Please provide a valid tokens.")
            except ConnectionError:
                print("Connection error. Could not connect to the cluster.")
            except Exception as e:
                print(f"An error occurred: {e}")
    elif vector_db=="3. ChromaDB":
        chroma_client = chromadb.PersistentClient(path="./chroma_db")
        update_session(vector_db=vector_db, vector_db_client=chroma_client)
        update_session(index=create_index(**session_state))
        update_session(vector_db_selected=True, index_created=True)
        print(f"Index {session_state['vector_db_client'].list_collections()[0].name} was created successfully.")

def choose_llm(llm_option):
    update_session(llm_option_selected=False)
    if llm_option == "1. GPT-4o":
        if session_state['sentence_encoder'] != "2. OpenAI":
            openai_api_key = os.getenv('OPENAI_API_KEY')
            if openai_api_key != '':
                try:
                    oa_client = OpenAI(api_key=openai_api_key)
                    response = oa_client.models.list()
                    update_session(llm_option_selected=True, llm_option=llm_option, llm_client=oa_client)
                except:
                    print("Invalid API key. Please provide a valid API token.")
        else:
            oa_client = session_state['embedding_client']
            update_session(llm_option_selected=True, llm_option=llm_option, llm_client=oa_client)

    elif llm_option == "2. FLAN-5":
        model = T5ForConditionalGeneration.from_pretrained("google/flan-t5-large")
        tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-large")
        update_session(llm_option_selected=True, llm_option=llm_option, llm_client=model, llm_tokenizer=tokenizer)

# Download ground truth

In [3]:
ground_truth = pd.read_csv('sample/ground-truth-retrieval.csv')
ground_truth = ground_truth.to_dict(orient='records')

# RAG Evaluation - GPT-4o

In [25]:
session_state = defaultdict(
    episode_option = "1. Try a sample",
    sentence_encoder = "1. T5",
    transcription_method = "1. Replicate",
    vector_db = "2. Elasticsearch",
    llm_option = "1. GPT-4o"
)

In [26]:
choose_podcast_option(session_state['episode_option'])
choose_encoder(session_state['sentence_encoder'])
choose_transcription_method(session_state['transcription_method'], session_state)
choose_vector_db(session_state['vector_db'])
choose_llm(session_state['llm_option'])

# download
episode_details = download_podcast(**session_state)
if episode_details['status'] == 'Success':
    print(episode_details['status_message'])
    update_session(episode_details=episode_details, podcast_downloaded=True)
else:
    print(episode_details['status_message'])
    update_session(podcast_downloaded=False)

# transcribe
if session_state['podcast_downloaded'] and not session_state.get('interaction_started', False):
    session_state['episode_details'].update(transcribe_podcast(**session_state))
    update_session(podcast_transcribed=True)

# encode
if session_state['podcast_transcribed'] and not session_state.get('interaction_started', False):
    if session_state['vector_db'] != "1. Minsearch":
        # try:
        session_state['episode_details'].update(encode_podcast(**session_state))
        update_session(podcast_embedded=True)
        # except:
        #     print("Encoding failed.")          
        #     update_session(podcast_embedded=False)
    else:
        update_session(podcast_embedded=True)

# populate index
if session_state['podcast_embedded'] and not session_state.get('interaction_started', False):
    index_podcast(**session_state)
    update_session(podcast_indexed=True)

The sample podcast doesn't require a transcription method.
Index podcast-transcriber was created successfully.
Podcast Past Present Future downloaded successfully.


In [27]:
session_state['num_results'] = 5
query = ground_truth[1231]['question']
result = rag(query, **session_state)
result = list(result)

In [28]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()


In [29]:
question = ground_truth[1231]['question']
question

'How did the Bolsheviks view terror in their methods?'

In [30]:
answer_llm = "".join(list(list(rag(question, **session_state))))
answer_llm

'The Bolsheviks were suspected of being far too comfortable with using terror as a tactic. Terror became a hallmark of Bolshevism, serving as a byword for their methods and the threat that they posed. '

In [31]:
answer_llm = "".join(list(result))

prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)


You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: How did the Bolsheviks view terror in their methods?
Generated Answer: The Bolsheviks were suspected of being far too comfortable with using terror as a tactic. Bolshevism became synonymous with terror, highlighting the extent to which they were associated with this method. 

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [32]:
df_question = pd.DataFrame(ground_truth)
df_sample = df_question.sample(n=200, random_state=1)
sample = df_sample.to_dict(orient='records')

In [33]:
evaluations = []

for record in tqdm(sample[:1]):
    question = record['question']
    answer_llm = "".join(list(list(rag(question, **session_state))))

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt, **session_state)
    print(evaluation)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 1/1 [00:04<00:00,  4.85s/it]

{
  "Relevance": "RELEVANT",
  "Explanation": "The generated answer directly addresses the question by explaining why coherence in the ideological platform was significant for the Russian revolution. It highlights how a unified set of ideas and goals was essential for organizing and mobilizing revolutionary forces, which is a key aspect of the importance of coherence in such a context."
}





In [12]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = "".join(list(list(rag(question, **session_state))))

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt, **session_state)
    evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))


100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [17:11<00:00,  5.16s/it]


In [14]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])
df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])
df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [15]:
df_eval.relevance.value_counts(normalize=True)

relevance
PARTLY_RELEVANT    0.545
RELEVANT           0.360
NON_RELEVANT       0.095
Name: proportion, dtype: float64

# RAG Evaluation - FLAN-5

In [34]:
session_state = defaultdict(
    episode_option = "1. Try a sample",
    sentence_encoder = "1. T5",
    transcription_method = "1. Replicate",
    vector_db = "2. Elasticsearch",
    llm_option = "2. FLAN-5"
)

In [35]:
choose_podcast_option(session_state['episode_option'])
choose_encoder(session_state['sentence_encoder'])
choose_transcription_method(session_state['transcription_method'], session_state)
choose_vector_db(session_state['vector_db'])
choose_llm(session_state['llm_option'])

# download
episode_details = download_podcast(**session_state)
if episode_details['status'] == 'Success':
    print(episode_details['status_message'])
    update_session(episode_details=episode_details, podcast_downloaded=True)
else:
    print(episode_details['status_message'])
    update_session(podcast_downloaded=False)

# transcribe
if session_state['podcast_downloaded'] and not session_state.get('interaction_started', False):
    session_state['episode_details'].update(transcribe_podcast(**session_state))
    update_session(podcast_transcribed=True)

# encode
if session_state['podcast_transcribed'] and not session_state.get('interaction_started', False):
    if session_state['vector_db'] != "1. Minsearch":
        # try:
        session_state['episode_details'].update(encode_podcast(**session_state))
        update_session(podcast_embedded=True)
        # except:
        #     print("Encoding failed.")          
        #     update_session(podcast_embedded=False)
    else:
        update_session(podcast_embedded=True)

# populate index
if session_state['podcast_embedded'] and not session_state.get('interaction_started', False):
    index_podcast(**session_state)
    update_session(podcast_indexed=True)



The sample podcast doesn't require a transcription method.
Index podcast-transcriber was created successfully.
Podcast Past Present Future downloaded successfully.


In [36]:
session_state['num_results'] = 5
query = ground_truth[1231]['question']
result = rag(query, **session_state)
result = list(result)

In [47]:
prompt2_template = """
You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: {question}
Generated Answer: {answer_llm}

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}}
""".strip()


In [48]:
question = ground_truth[1231]['question']
question

'How did the Bolsheviks view terror in their methods?'

In [49]:
answer_llm = "".join(list(list(rag(question, **session_state))))
answer_llm

'far too comfortable with terror as a tactic '

In [50]:
answer_llm = "".join(list(result))

prompt = prompt2_template.format(question=question, answer_llm=answer_llm)
print(prompt)


You are an expert evaluator for a RAG system.
Your task is to analyze the relevance of the generated answer to the given question.
Based on the relevance of the generated answer, you will classify it
as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

Here is the data for evaluation:

Question: How did the Bolsheviks view terror in their methods?
Generated Answer: far too comfortable with terror as a tactic 

Please analyze the content and context of the generated answer in relation to the question
and provide your evaluation in parsable JSON without using code blocks:

{
  "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
  "Explanation": "[Provide a brief explanation for your evaluation]"
}


In [52]:
evaluations = []

for record in tqdm(sample):
    question = record['question']
    answer_llm = "".join(list(list(rag(question, **session_state))))

    prompt = prompt2_template.format(
        question=question,
        answer_llm=answer_llm
    )

    evaluation = llm(prompt, **session_state)
    # evaluation = json.loads(evaluation)

    evaluations.append((record, answer_llm, evaluation))


100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [1:41:56<00:00, 30.58s/it]


In [55]:
df_eval = pd.DataFrame(evaluations, columns=['record', 'answer', 'evaluation'])
df_eval['id'] = df_eval.record.apply(lambda d: d['id'])
df_eval['question'] = df_eval.record.apply(lambda d: d['question'])
df_eval['relevance'] = df_eval['evaluation']
# df_eval['relevance'] = df_eval.evaluation.apply(lambda d: d['Relevance'])
# df_eval['explanation'] = df_eval.evaluation.apply(lambda d: d['Explanation'])

del df_eval['record']
del df_eval['evaluation']

In [56]:
df_eval.relevance.value_counts(normalize=True)

relevance
PARTLY_RELEVANT    0.61
NON_RELEVANT       0.39
Name: proportion, dtype: float64

In [66]:
import pandas as pd
import altair as alt

# Define the metrics for each search engine
data = {
    'system': ['ChromaDB', 'ChromaDB', 'ChromaDB',
               'Minsearch', 'Minsearch', 'Minsearch',
               'Elasticsearch', 'Elasticsearch', 'Elasticsearch'],
    'metric': ['hit_rate', 'mrr', 'average_retrieval_time',
               'hit_rate', 'mrr', 'average_retrieval_time',
               'hit_rate', 'mrr', 'average_retrieval_time'],
    'value': [0.7024208566108008, 0.29600869025450816, 0.022532762051516626,
              0.7169459962756052, 0.5400331057314296, 0.005704473250405083,
              0.7415270018621974, 0.6090006207324642, 0.08933900712366655]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create the Altair plot with different y-axis titles based on the metric
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('system:N', title='System'),
    y=alt.Y('value:Q', title='Value', scale=alt.Scale(zero=False)),
    color=alt.Color('system:N', title='System')
).facet(
    facet=alt.Facet('metric:N', title='Metric', header=alt.Header(labelAngle=0)),
    columns=3
).resolve_scale(
    y='independent'
).properties(
    title='Comparison of Metrics for ChromaDB, Minsearch, and Elasticsearch'
)

chart.show()

In [67]:
import pandas as pd
import altair as alt

# Define the metrics for each search engine
data = {
    'system': ['ChromaDB', 'ChromaDB', 'ChromaDB',
               'Minsearch', 'Minsearch', 'Minsearch',
               'Elasticsearch', 'Elasticsearch', 'Elasticsearch'],
    'metric': ['hit_rate', 'mrr', 'average_retrieval_time',
               'hit_rate', 'mrr', 'average_retrieval_time',
               'hit_rate', 'mrr', 'average_retrieval_time'],
    'value': [0.7024208566108008, 0.29600869025450816, 0.022532762051516626,
              0.7169459962756052, 0.5400331057314296, 0.005704473250405083,
              0.7415270018621974, 0.6090006207324642, 0.08933900712366655]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create the Altair plot with different y-axis titles based on the metric
chart = alt.Chart(df).mark_bar().encode(
    x=alt.X('system:N', title=None, axis=alt.Axis(labels=False, ticks=False)),
    y=alt.Y('value:Q', title='Value', scale=alt.Scale(zero=False)),
    color=alt.Color('system:N', title=None)
).facet(
    facet=alt.Facet('metric:N', title='Metric', header=alt.Header(labelAngle=0)),
    columns=3
).resolve_scale(
    y='independent'
).properties(
    title='Comparison of Metrics for ChromaDB, Minsearch, and Elasticsearch'
)

chart.show()

In [69]:
import pandas as pd
import altair as alt

# Define the metrics for each search engine
data = {
    'system': ['ChromaDB', 'ChromaDB', 'ChromaDB',
               'Minsearch', 'Minsearch', 'Minsearch',
               'Elasticsearch', 'Elasticsearch', 'Elasticsearch'],
    'metric': ['Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time'],
    'value': [0.7024208566108008, 0.29600869025450816, 0.022532762051516626,
              0.7169459962756052, 0.5400331057314296, 0.005704473250405083,
              0.7415270018621974, 0.6090006207324642, 0.08933900712366655]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create separate charts for each metric with appropriate y-axis titles
charts = []
metrics = df['metric'].unique()

for metric in metrics:
    y_title = 'Seconds' if metric == 'Average Retrieval Time' else '%'
    chart = alt.Chart(df[df['metric'] == metric]).mark_bar().encode(
        x=alt.X('system:N', title=None, axis=alt.Axis(labels=False, ticks=False)),
        y=alt.Y('value:Q', title=y_title, scale=alt.Scale(zero=False)),
        color=alt.Color('system:N', title=None)
    ).properties(
        width=200,
        title=metric
    )
    charts.append(chart)

final_chart = alt.hconcat(*charts).resolve_scale(y='independent').properties(
    title='Comparison of Metrics for ChromaDB, Minsearch, and Elasticsearch'
)

final_chart.show()

In [72]:
import pandas as pd
import altair as alt

# Define the metrics for each search engine
data = {
    'system': ['ChromaDB', 'ChromaDB', 'ChromaDB',
               'Minsearch', 'Minsearch', 'Minsearch',
               'Elasticsearch', 'Elasticsearch', 'Elasticsearch'],
    'metric': ['Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time'],
    'value': [0.7024208566108008, 0.29600869025450816, 0.022532762051516626,
              0.7169459962756052, 0.5400331057314296, 0.005704473250405083,
              0.7415270018621974, 0.6090006207324642, 0.08933900712366655]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create separate charts for each metric with appropriate y-axis titles
charts = []
metrics = df['metric'].unique()

for metric in metrics:
    y_title = 'Seconds' if metric == 'Average Retrieval Time' else '%'
    chart = alt.Chart(df[df['metric'] == metric]).mark_bar().encode(
        x=alt.X('system:N', title=None, axis=alt.Axis(labels=False, ticks=False)),
        y=alt.Y('value:Q', title=y_title, scale=alt.Scale(zero=False)),
        color=alt.Color('system:N', title=None)
    ).properties(
        width=200,
        title=metric
    )
    charts.append(chart)

final_chart = alt.hconcat(*charts).resolve_scale(y='independent')

final_chart.show()

In [71]:
import pandas as pd
import altair as alt

# Define the metrics for each search engine
data = {
    'system': ['ChromaDB', 'ChromaDB', 'ChromaDB',
               'Minsearch', 'Minsearch', 'Minsearch',
               'Elasticsearch', 'Elasticsearch', 'Elasticsearch'],
    'metric': ['Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time',
               'Hit Rate', 'MRR', 'Average Retrieval Time'],
    'value': [0.7024208566108008, 0.29600869025450816, 0.022532762051516626,
              0.7169459962756052, 0.5400331057314296, 0.005704473250405083,
              0.7415270018621974, 0.6090006207324642, 0.08933900712366655]
}

# Create a DataFrame
df = pd.DataFrame(data)

# Create separate charts for each metric with appropriate y-axis titles
charts = []
metrics = df['metric'].unique()

for metric in metrics:
    y_title = 'Seconds' if metric == 'Average Retrieval Time' else '%'
    chart = alt.Chart(df[df['metric'] == metric]).mark_bar().encode(
        x=alt.X('system:N', title=None, axis=alt.Axis(labels=True, ticks=True)),
        y=alt.Y('value:Q', title=y_title, scale=alt.Scale(zero=False)),
        color=alt.Color('system:N', legend=None)
    ).properties(
        width=200
    )
    charts.append(chart)

final_chart = alt.hconcat(*charts).resolve_scale(y='independent')

final_chart.show()