In [2]:
import dotenv
assert dotenv.load_dotenv()

In [3]:
# Import required libraries
import os
from langchain_aws import ChatBedrock

# Set up the model ID for Claude
# MODEL_ID = "anthropic.claude-3-haiku-20240307-v1:0"
# MODEL_ID = "anthropic.claude-3-sonnet-20240229-v1:0"
MODEL_ID = "anthropic.claude-3-5-sonnet-20240620-v1:0"

# Initialize the ChatBedrock instance
llm = ChatBedrock(model_id=MODEL_ID, model_kwargs={'temperature': 0.55})

In [4]:
from src.static.submission import Submission
from src.static.util import PROJECT_ROOT
import src.submission.tools.database as db_tools
import src.submission.tools.chart as chart

sagemaker.config INFO - Not applying SDK defaults from location: /etc/xdg/sagemaker/config.yaml
sagemaker.config INFO - Not applying SDK defaults from location: /home/ec2-user/.config/sagemaker/config.yaml


In [5]:
from crewai import Agent, Crew, Process, Task
from crewai.project import CrewBase, agent, crew, task
from langchain_core.tools import tool

In [6]:
@CrewBase
class AdvancedPIRLSCrew(Submission):
    """Data Analysis Crew for the GDSC project."""
    # Load the files from the config directory
    agents_config = PROJECT_ROOT / 'submission' / 'config' / 'agents.yaml'
    tasks_config = PROJECT_ROOT / 'submission' / 'config' / 'tasks.yaml'

    def __init__(self, llm):
        self.llm = llm
        # self.low_llm = low_llm
        # self.high_llm = high_llm

    def run(self, prompt: str) -> str:
        return self.crew().kickoff(inputs={'user_question': prompt}).raw

    @agent
    def lead_data_analyst(self) -> Agent:
        a = Agent(
            config=self.agents_config['lead_data_analyst'],
            llm=self.llm,
            allow_delegation=True,
            verbose=True
        )
        return a

    @agent
    def data_engineer(self) -> Agent:
        a = Agent(
            config=self.agents_config['data_engineer'],
            llm=self.llm,
            allow_delegation=False,
            verbose=True,
            tools=[
                db_tools.query_database,
                db_tools.get_possible_answers_to_question,
                db_tools.get_questions_of_given_type
            ]
        )
        return a

    @task
    def answer_question_task(self) -> Task:
        t = Task(
            config=self.tasks_config['answer_question_task'],
            agent=self.lead_data_analyst(),
            tools=[
                chart.generate_chart,
            ]
        )
        return t

    @crew
    def crew(self) -> Crew:
        """Creates the data analyst crew"""
        return Crew(
            agents=self.agents,
            tasks=self.tasks,
            process=Process.sequential,
            verbose=True,
            max_iter=5,
            cache=True
        )

In [19]:
crew = AdvancedPIRLSCrew(llm=llm)

print(crew.run("How do teacher's job satisfaction relate to student learning outcomes, as reflected by their level of reading proficiency?"))

[1m[95m [2024-10-24 19:09:52][DEBUG]: == Working Agent: PIRLS Lead Data Analyst
[00m
[1m[95m [2024-10-24 19:09:52][INFO]: == Starting Task: Provide a comprehensive, data-driven response to the following question:     How do teacher's job satisfaction relate to student learning outcomes, as reflected by their level of reading proficiency?
Utilize the PIRLS 2021 dataset to support your analysis, adhering to these principles: - Focus exclusively on your expertise as a PIRLS data analyst - Ensure accuracy and relevance in addressing the research question - Explain your methodology and reasoning clearly - Support claims with evidence from the PIRLS dataset - Generate logically consistent and well-reasoned conclusions - Contextualize findings within broader educational trends when appropriate - Identify any limitations in the data or analysis - Use markdown format for your final answer. - Be short and descriptive in your findings.
[00m


[1m> Entering new CrewAgentExecutor chain...[0


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.boxplot(


[95m 

https://gdsc-bucket-891377155936.s3.amazonaws.com/teacher_satisfaction_vs_reading_scores.png
[00m
[32;1m[1;3mThought: Now that I have successfully generated a chart visualizing the relationship between teacher job satisfaction and student reading scores, I can analyze the data and provide a comprehensive response to the original question.

Final Answer:

#GDSC7

# How Teacher Job Satisfaction Relates to Student Learning Outcomes in Reading Proficiency 📚👩‍🏫

Based on the analysis of the PIRLS 2021 dataset, we can observe a clear relationship between teacher job satisfaction and student learning outcomes in reading proficiency. Here's a breakdown of our findings:

## 📊 Data Visualization

![Teacher Job Satisfaction vs Student Reading Scores](https://gdsc-bucket-891377155936.s3.amazonaws.com/teacher_satisfaction_vs_reading_scores.png)

## 📈 Key Observations

1. **Positive Correlation**: There appears to be a positive correlation between teacher job satisfaction and student read

In [12]:
crew = AdvancedPIRLSCrew(llm=llm)

# print(crew.run("Explore how individual, school, and national factors simultaneously impact reading literacy in Tunisia."))

In [None]:
crew = AdvancedPIRLSCrew(llm=llm)

print(crew.run("When looking at the PIRLS 2021 study I found that there are big differences in the reading capabilities between students of different countries and regions. What are the main drivers behind this?"))

[1m[95m [2024-10-24 19:16:06][DEBUG]: == Working Agent: PIRLS Lead Data Analyst
[00m
[1m[95m [2024-10-24 19:16:06][INFO]: == Starting Task: Provide a comprehensive, data-driven response to the following question:     When looking at the PIRLS 2021 study I found that there are big differences in the reading capabilities between students of different countries and regions. What are the main drivers behind this?
Utilize the PIRLS 2021 dataset to support your analysis, adhering to these principles: - Focus exclusively on your expertise as a PIRLS data analyst - Ensure accuracy and relevance in addressing the research question - Explain your methodology and reasoning clearly - Support claims with evidence from the PIRLS dataset - Generate logically consistent and well-reasoned conclusions - Contextualize findings within broader educational trends when appropriate - Identify any limitations in the data or analysis - Use markdown format for your final answer. - Be short and descriptive i

  sns.scatterplot(


[95m 

https://gdsc-bucket-891377155936.s3.amazonaws.com/books_vs_reading_scores.png
[00m
