I employed a different model to generate summaries of the answers previously obtained. These summaries were then added to a new column labeled "Summary" within the same Excel file.

In [15]:
import pandas as pd
import os
import requests
from dotenv import load_dotenv
import csv

api_key= #provide API key here

def ask_gpt(question, data_context, model="gpt-4-0125-preview"):
    if not api_key:
        raise Exception("OpenAI API key is not set in environment variables.")

    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "Please answer the following question based on the data provided:"},
            {"role": "user", "content": f"{data_context}\n\n{question}"}
        ],
        temperature=0.1
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        return f"Error: Received status code {response.status_code}"

def summarize_with_gpt(text, model="gpt-4-0125-preview"):
    if not api_key:
        raise Exception("OpenAI API key is not set in environment variables.")

    url = "https://api.openai.com/v1/chat/completions"
    headers = {
        "Content-Type": "application/json",
        "Authorization": f"Bearer {api_key}"
    }
    data = {
        "model": model,
        "messages": [
            {"role": "system", "content": "Please provide a summary for the following text:"},
            {"role": "user", "content": f"Summarize this: {text}"}
        ],
        temperature=0.3
    }
    response = requests.post(url, json=data, headers=headers)
    if response.status_code == 200:
        return response.json()['choices'][0]['message']['content']
    else:
        return f"Error: Received status code {response.status_code}"


def get_questions_from_csv(file_path):
    questions = []
    with open(file_path, 'r', newline='') as csvfile:
        csvreader = csv.reader(csvfile)
        for row in csvreader:
            for cell in row:
                if cell.strip().endswith('?'):
                    questions.append(cell.strip())
    return questions


def main():
    data = pd.read_csv("industry.csv")  # Change to your dataset path
    data_context = data.to_string()

    questions_file_path = "questions_list.csv"  # Change to your questions file path
    questions = get_questions_from_csv(questions_file_path)

    qa_pairs = []
    for question in questions:
        answer = ask_gpt(question, data_context)
        print(f"Q: {question}\nA: {answer}\n")
        qa_pairs.append((question, answer))

    qa_df = pd.DataFrame(qa_pairs, columns=['Question', 'Answer'])
    all_answers = ' '.join(qa_df['Answer'])
    overall_summary = summarize_with_gpt(all_answers)

    summary_df = pd.DataFrame([["Overall Summary", overall_summary]], columns=['Question', 'Answer'])
    qa_df = pd.concat([qa_df, summary_df], ignore_index=True)
    
    qa_df.to_csv('/ues_ans_pairs.csv', index=False)  # Change to your output file path

if __name__ == "__main__":
    main()

Q: What is the average attendance percentage across all modules?
A: To find the average attendance percentage across all modules, we first sum all the attendance percentages and then divide by the total number of modules (students in this case). Let's calculate:

- Ethan Carter: 50%
- Olivia Bennett: 40%
- Mason Brooks: 80%
- Ava Turner: 70%
- Noah Griffin: 20%
- Emily Ford: 45%
- Jacob Morris: 86%
- Lily Hughes: 89%
- Lucas Murphy: 100%
- Sophia Perry: 49%

Total Attendance Percentage = 50% + 40% + 80% + 70% + 20% + 45% + 86% + 89% + 100% + 49% = 629%

Number of Modules (or students) = 10

Average Attendance Percentage = (Total Attendance Percentage) / (Number of Modules) = 629% / 10 = 62.9%

Thus, the average attendance percentage across all modules is 62.9%.

Q: Which student has the highest attendance percentage?
A: Lucas Murphy has the highest attendance percentage with 100%.

Q: Are there any modules with particularly high or low attendance rates?
A: Yes, there are:

- **High Att