In [None]:
#region LIBRARIES
import os
import requests
import json
import matplotlib.pyplot as plt
from datetime import datetime
from openai import OpenAI
import re
import pandas as pd
from collections import Counter
from pathlib import Path
#endregion

#region INPUT FIELDS
apiKey = input("Enter Your OpenAI API Key:")
studentName = input("Enter the Student Name: ")
studentEmail = input("Enter Student Email: ")
subject = input("Enter the Subject: ")
startDate = input("Enter the Start Date (YYYY-MM-DD): ")
endDate = input("Enter the End Date (YYYY-MM-DD): ")
#endregion

#region GLOBAL VARIABLES
client = OpenAI(
  api_key= f"{apiKey}",
)

studentAcademicDataJSON = None
detailedSessionCoachingDataJSON = None

COST_PER_THOUSAND_TOKENS_INPUT = 0.01
COST_PER_THOUSAND_TOKENS_OUTPUT = 0.03

inputTokenNum = None
outputTokenNum = None
inputCost = None
outputCost = None
#endregion

#region FUNCTIONS
def getStudentAcademicData(studentName, subject, startDate, endDate): # Get Student Academic Data JSON

    global studentAcademicDataJSON

    studentAcademicDataApiUrl = "https://hpjgmbk65zgzff64b6y7vhw2ua0sytlh.lambda-url.us-east-1.on.aws/"
    studentAcademicDataApiHeaders = {
        "Authorization": "-ehN=MkFaIk/iDMinaaoar?fJwbRlKpqEqWQ11cFxU=ibqwQe9Uj0eF6XP-6JgvT"
    }
    studentAcademicDataApiParams = {
        "student": studentName,
        "subject": subject,
        "startDate": startDate,
        "endDate": endDate
    }
    studentAcademicDataApiResponse = requests.get(studentAcademicDataApiUrl, headers=studentAcademicDataApiHeaders, params=studentAcademicDataApiParams)

    if studentAcademicDataApiResponse.status_code == 200:
        jsonInTheURL = requests.get(str(next(iter(studentAcademicDataApiResponse.json().values()), 'No values found in response'))).json()
        studentAcademicDataJSON = jsonInTheURL
        return studentAcademicDataJSON
    else:
        return f"Request failed with status code: {studentAcademicDataApiResponse.status_code}"

def detailedSessionCoachingData(studentEmail, subject, startDate, endDate): # Get Detailed Session Coaching Data JSON

    global detailedSessionCoachingDataJSON

    detailedSessionCoachingDataApiUrl = "https://mk62mbxkbe6flyv44eli5shrhi0hinml.lambda-url.us-east-1.on.aws/"
    detailedSessionCoachingDataApiHeaders = {
        "Authorization": "qVSA4LC9SdTrsKezwHlhD9r=5cH5tPZLrG3e24kRZNaLrnLIm4jk0cN2YfgquIRN"
    }
    detailedSessionCoachingDataApiParams = {
        "email": studentEmail,
        "subject": subject,
        "startDate": startDate,
        "endDate": endDate
    }
    detailedSessionCoachingDataApiResponse = requests.get(detailedSessionCoachingDataApiUrl, headers=detailedSessionCoachingDataApiHeaders, params=detailedSessionCoachingDataApiParams)

    if detailedSessionCoachingDataApiResponse.status_code == 200:
        json = detailedSessionCoachingDataApiResponse.json()
        detailedSessionCoachingDataJSON = json
        return detailedSessionCoachingDataJSON
    else:
        return f"Request failed with status code: {detailedSessionCoachingDataApiResponse.status_code}"

def calculateTokensInput(text): # Calculates the number of tokens in the given text.

    global inputTokenNum

    inputTokenNum = len(text.split())
    return inputTokenNum

def calculateCostInput(numTokens): # Calculates the cost based on the number of tokens.

    global inputCost

    inputCost = (numTokens / 1000) * COST_PER_THOUSAND_TOKENS_INPUT
    return inputCost

def calculateTokensOutput(text): # Calculates the number of tokens in the given text.

    return len(text.split())

def calculateCostOutput(numTokens): # Calculates the cost based on the number of tokens.

    return (numTokens / 1000) * COST_PER_THOUSAND_TOKENS_OUTPUT

def ExtractCodeSnippets(gptOutputMessageString): # Function to extract code snippets from the provided string
    pattern = r"```python\n(.*?)```"
    matches = re.findall(pattern, gptOutputMessageString, re.DOTALL)
    return matches

def ExecuteAndDisplayPlots(gptOutputMessageCodeSnippets): # Function to execute code snippets and display plots inline
    for code in gptOutputMessageCodeSnippets:
        # Dynamically execute each code snippet
        exec(code)
#endregion

#region ACADEMIC DATA API CALLS
getStudentAcademicData(studentName, subject, startDate, endDate)
detailedSessionCoachingData(studentEmail, subject, startDate, endDate)

studentAcademicDataStrForGpt = json.dumps(studentAcademicDataJSON, indent=2)
detailedSessionCoachingDataStrForGpt = json.dumps(detailedSessionCoachingDataJSON, indent=2)
#endregion

#region GPT API CALL
prompt = f"""
WORKFLOW:

1 - You analyze JSON data named studentAcademicDataJSON and detailedSessionCoachingDataJSON and answer questions stated in "OUTPUT FORMAT AND QUESTIONS TO BE ANSWERED" section.
studentAcademicDataJSON:
{studentAcademicDataStrForGpt}

detailedSessionCoachingDataJSON:
{detailedSessionCoachingDataStrForGpt}

2 - Answer questions presented in "OUTPUT FORMAT AND QUESTIONS TO BE ANSWERED" section after your complete step 1.

3 - While answering those questions, you must pay attention to the "parameters to take into consideration for answering those questions" section of each question, as your answers will be based on those parameters. Be descriptive yet proof-based as much as possible. Make sure to underline the thresholds stated in the "parameters to take into consideration for answering those questions" section of each question. You must use data in JSONs for providing proof. Don't forget to use the numbers, statistics, dates, percentages stated in JSONs. I don't want you to use general expressions like "such as". I want you to provide evidence that you find inside JSONs. This is a must-have requirement.

RULES:

- Always refer to the exact numbers (stats, numbers, statistics, dates, percentages) you find in the JSONs and comparison with thresholds stated in "Parameters to take into consideration for answering this question:" section while providing answer for "add evidence and reasoning" section of each question. I don't want you to use general expressions like "such as". I want you to provide evidence that you find inside JSONs. This is a must-have requirement.
- You must select only one of the options when you see "(select one)‚Äù statement.
- Each of your answers must contain stats, numbers, statistics, dates, percentages that drive you to reach that decision. I don't want you to use general expressions, but detailed and data-oriented answers (stats, numbers, statistics, dates, percentages) and comparison with thresholds stated in "Parameters to take into consideration for answering this question:" section of each question are expected. I don't want you to use general expressions like "such as". I want you to provide evidence that you find inside JSONs.


OUTPUT FORMAT AND QUESTIONS TO BE ANSWERED:

PART - I

Q1 - Most important problem
Parameters to take into consideration for answering this question:
- After you perform your analysis, summarize your findings in a way that clearly explains the causes of the problem statement
Q2 - What Alpha (the school) needs to do
Parameters to take into consideration for answering this question:
- Clearly state what needs to be done and by whom to fix the problem and help the student learn more effectively
Q3 - What is the message for the student
Parameters to take into consideration for answering this question:
- Write what the student should do in a way that is clear and easy for the student to understand. If the student follows this recommendation, they should learn more effectively

PART - II

Q4 - Is the student progressing optimally?
Parameters to take into consideration for answering this question:
- Suboptimal progress is indicated by mastering less than 70% of the lesson target in the period. Struggling students often have low accuracy, which is below 80%.
The structure of the answer (Make sure to fill all sections of the structure of the answer):
- Yes / No (select one, no other choice)
- Exact evidence and reasoning
- Describe the steps you have taken to arrive at the conclusion stated above. List and link all the sources consulted, as long as any challenges in finding relevant information.
Q5 - Is the student working at the right level?
Parameters to take into consideration for answering this question:
- Students should be bracketed by two standardized tests - one mastered (90%+ score) and one unmastered (score below 90%) in the grade immediately above.
The structure of the answer (Make sure to fill all sections of the structure of the answer):
- Yes / No (select one, no other choice)
- Exact evidence and reasoning
- Describe the steps you have taken to arrive at the conclusion stated above. List and link all the sources consulted, as long as any challenges in finding relevant information.
Q6 - Is the student behaving as a '2hr learner'?
Parameters to take into consideration for answering this question:
- At least 25 minutes/school day dedicated to learning,
- Strived to meet their 'lessons mastered' targets without squandering their learning time.
- Used the apps correctly and avoided learning anti-patterns
The structure of the answer (Make sure to fill all sections of the structure of the answer, and ):
- Yes / No-not putting enough time / No-30% waste / No-50% waste / No-90% waste (select one, no other choice)
- Exact evidence and reasoning
- Describe the steps you have taken to arrive at the conclusion stated above. List and link all the sources consulted, as long as any challenges in finding relevant information.
Q7 - What are the reasons for the lack of progress?
Parameters to take into consideration for answering this question:
- Struggling students consistently have low accuracy (below 80%) or take multiple sessions to master skills. This could be a consequence of anti-patterns, insufficient support, subpar skill plans, or knowledge gaps.
The structure of the answer (Make sure to fill all sections of the structure of the answer):
- None-optimal progress / Wrong level / Not enough time / Waste-Antipatterns / Lack of scaffolding-support / Unable to determine (select one, no other choice)
- Exact evidence and reasoning (include the names and numbers of antipatterns if you detect any)
- Describe the steps you have taken to arrive at the conclusion stated above. List and link all the sources consulted, as long as any challenges in finding relevant information.
Q8 - Write Python code snippets to create distinct charts for Q4, Q5, Q6, and Q7. Each snippet should be tailored to illustrate the findings related to its respective question clearly and convincingly. Utilize the data provided in the 'Exact evidence and reasoning' sections for each question as the basis for your charts. Input the data values directly into the scripts without using dataframes, and ensure these values are exact, not hypothetical. The objective is to make the outcomes from each question's findings visually unmistakable through these charts. Additionally, to prevent the common 'ValueError: shape mismatch' issue, particularly when shapes between arrays don't align (e.g., one array has shape (15,) and another has shape (14,)), carefully verify that the data arrays or lists you use for plotting have matching dimensions. This check is crucial for ensuring that your scripts execute smoothly and the visual representations are clear and effective.
"""

messages = [
    {
        "role": "system",
        "content": "You are an extremely experienced data analyst with 30 years of education industry experience."
    },
    {
        "role": "user",
        "content": prompt
    }
]

gptResponse = client.chat.completions.create(
model="gpt-4-turbo-preview",
messages=messages,
temperature=0.2,
max_tokens=4096
)
#endregion

#region DATA CLEANING AND FORMATTING
gptOutputMessage = gptResponse.choices[0].message.content # Extracting the text from the response


codeForVisuals = ExtractCodeSnippets(gptOutputMessage) # Extract code snippets from the string


lastQ8Index = gptOutputMessage.rfind("Q8") # Find the last occurrence of "Q8" in the string

if lastQ8Index != -1: # If "Q8" is found, slice the string up to that point; otherwise, keep it as is
    modifiedMessage = gptOutputMessage[:lastQ8Index] # Adding 2 to include the "Q8" itself in the removal
else:
    modifiedMessage = gptOutputMessage
#endregion

#region FINAL EXECUTION AND OUTPUT
print(modifiedMessage)
ExecuteAndDisplayPlots(codeForVisuals)
#endregion
