In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import langchain
import langchain_google_genai

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
import fitz 

In [8]:
def extract_text_from_pdf(pdf_path):
    doc = fitz.open(pdf_path)
    text = ""
    for page in doc:
        text += page.get_text()
    return text

# Example usage
pdf_path = input("Enter file path:")
extracted_text = extract_text_from_pdf(pdf_path)
print(extracted_text)

Enter file path: ./Data/3rd sem.pdf


                 VTU PROVISIONAL RESULTS OF UG / PG December-2023/January-2024 EXAMINATION.                    
USN
1BI22CD015
Student Name
HARSHA K L
                Semester : 3               
Subject Code
Subject Title
Internal Marks
External Marks
Total Marks
Subject Result
BCS301
MATHEMATICS FOR COMPUTER
SCIENCE
50
45
95
P
BCS302
DIGITAL DESIGN & COMPUTER
ORGANIZATION
50
43
93
P
BCS303
OPERATING SYSTEMS
50
30
80
P
BCS304
DATA STRUCTURES AND
APPLICATIONS
50
32
82
P
BCSL305
DATA STRUCTURES LAB
50
50
100
P
BSCK307
SOCIAL CONNECT AND
RESPONSIBILITY
100
0
100
P
BPEK359
PHYSICAL EDUCATION
95
0
95
P
BCS358A
DATA ANALYTICS WITH EXCEL
50
50
100
P
BCS306A
OBJECT ORIENTED
PROGRAMMING WITH JAVA
50
34
84
P
Nomenclature / Abbreviations
P -> PASS
F -> FAIL
A -> ABSENT
W -> WITHHELD
X, NE -> NOT ELIGIBLE
Note :
1) Results of some subjects of some students are not appearing due to reasons such as,
  a) CIE not Available
  b) SEE not available
  because of technical reasons etc, however they will b

In [9]:
#Check student name

print("1BI22CD015" in extracted_text)

True


In [11]:
import re

match = re.search(r"Semester\s*:\s*(\d+)", extracted_text)
if match:
    semester = match.group(1)
    print("Semester:", semester)

Semester: 3


In [5]:
import re

def extract_result_data(text):
    # This regex handles both single-line and multi-line subject name formats
    pattern = re.compile(
        r'(?P<code>B\w{2,10})\n'                                      # Subject code
        r'(?P<name>(?:[A-Z][^\n]*\n?)+?)'                              # Subject name (1 or more lines)
        r'(?P<internal>\d{1,3})\n'                                     # Internal marks
        r'(?P<external>\d{1,3})\n'                                     # External marks
        r'(?P<total>\d{1,3})\n'                                        # Total
        r'(?P<result>[PFWAX])',                                       # Result
        re.MULTILINE
    )

    results = []
    for match in pattern.finditer(text):
        subject_data = {
            'subject_code': match.group('code').strip(),
            'subject_name': match.group('name').replace('\n', ' ').strip(),
            'internal': int(match.group('internal')),
            'external': int(match.group('external')),
            'total': int(match.group('total')),
            'result': match.group('result')
        }
        results.append(subject_data)

    return results

extracted_data = extract_result_data(extracted_text)
print(extracted_data)

[{'subject_code': 'BMATS101', 'subject_name': 'MATHEMATICS FOR CSE STREAM-I', 'internal': 48, 'external': 50, 'total': 98, 'result': 'P'}, {'subject_code': 'BPHYS102', 'subject_name': 'PHYSICS FOR CSE STREAM', 'internal': 50, 'external': 50, 'total': 100, 'result': 'P'}, {'subject_code': 'BPOPS103', 'subject_name': 'PRINCIPLES OF PROGRAMMING USING C', 'internal': 50, 'external': 39, 'total': 89, 'result': 'P'}, {'subject_code': 'BENGK106', 'subject_name': 'COMMUNICATIVE ENGLISH', 'internal': 45, 'external': 37, 'total': 82, 'result': 'P'}, {'subject_code': 'BICOK107', 'subject_name': 'INDIAN CONSTITUTION', 'internal': 41, 'external': 27, 'total': 68, 'result': 'P'}, {'subject_code': 'BIDTK158', 'subject_name': 'INNOVATION AND DESIGN THINKING', 'internal': 48, 'external': 28, 'total': 76, 'result': 'P'}, {'subject_code': 'BESCK104B', 'subject_name': 'INTRODUCTION TO ELECTRICAL ENGINEERING', 'internal': 50, 'external': 43, 'total': 93, 'result': 'P'}, {'subject_code': 'BETCK105H', 'subje

In [6]:
len(extracted_data)

8

In [7]:
def extract_data_from_pdf(pdf_path):
    result = extract_text_from_pdf(pdf_path)
    result = extract_result_data(result)
    return result

In [8]:
import os

student_data = {"name":"Harsha K L", "overall_results" : []}
folder_path = r".\Data"
i = 1
for filename in os.listdir(folder_path):
    if filename.endswith(".pdf"):
        file_path = os.path.join(folder_path, filename)
        student_data["overall_results"].append({"sem":i, "results":extract_data_from_pdf(file_path)})
        i+=1

In [9]:
for sem in student_data["overall_results"]:
    print(f"Sem: {sem['sem']} : {len(sem['results'])}")

Sem: 1 : 8
Sem: 2 : 8
Sem: 3 : 9
Sem: 4 : 9
Sem: 5 : 9


## -> go to persistent chatbot

In [33]:
prompt_template = """
You are an academic advisor assistant. Your task is to analyze a student's final year exam results and attendance and generate a personalized academic feedback. The feedback should highlight:

1. Overall academic performance (based on total marks in subjects).
2. Subject-wise observations (strengths, weaknesses).
3. Remarks on attendance (if it is below 75%, point it out).
4. Motivation and improvement suggestions.

Format the feedback in simple, encouraging, and constructive language suitable for an engineering student.

Here is the student's data:
{student_info}

Generate the feedback in under 200 words.
Use only the data provided, dont include external data
"""

print("Prompt Template Ready")

Prompt Template Ready


In [15]:
student_data = {
    "name": "Pavan D",
    "sem": "5",
    "attendance": "72%",
    "result": extracted_data
}

student_info = f"{student_data['name']} is in semester {student_data['sem']}, attendance is {student_data['attendance']}, and their results are: {student_data['result']}."

## LANGCHAIN MODEL

In [1]:
import os
from dotenv import load_dotenv

load_dotenv()
api_key = os.getenv("GEMINI_API_KEY")

In [11]:
import os
os.environ["GOOGLE_API_KEY"] = api_key

In [12]:
import google.generativeai as genai
genai.configure(api_key=api_key)

In [13]:
for model in genai.list_models():
    print(model.name, model.supported_generation_methods)

models/chat-bison-001 ['generateMessage', 'countMessageTokens']
models/text-bison-001 ['generateText', 'countTextTokens', 'createTunedTextModel']
models/embedding-gecko-001 ['embedText', 'countTextTokens']
models/gemini-1.0-pro-vision-latest ['generateContent', 'countTokens']
models/gemini-pro-vision ['generateContent', 'countTokens']
models/gemini-1.5-pro-latest ['generateContent', 'countTokens']
models/gemini-1.5-pro-001 ['generateContent', 'countTokens', 'createCachedContent']
models/gemini-1.5-pro-002 ['generateContent', 'countTokens', 'createCachedContent']
models/gemini-1.5-pro ['generateContent', 'countTokens']
models/gemini-1.5-flash-latest ['generateContent', 'countTokens']
models/gemini-1.5-flash-001 ['generateContent', 'countTokens', 'createCachedContent']
models/gemini-1.5-flash-001-tuning ['generateContent', 'countTokens', 'createTunedModel']
models/gemini-1.5-flash ['generateContent', 'countTokens']
models/gemini-1.5-flash-002 ['generateContent', 'countTokens', 'createCac

In [19]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.schema import SystemMessage, HumanMessage
from langchain.chat_models import ChatOpenAI
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from langchain.memory import ConversationBufferMemory

In [20]:
llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-002", temperature=0, max_output_tokens = 200)

prompt = PromptTemplate(
    input_variables=[student_info],
    template=prompt_template
)

chain = LLMChain(llm=llm, prompt=prompt)

In [13]:
response = chain.run(student_info)
print(response)

  response = chain.run(student_info)


Pavan D, your semester 5 results demonstrate a solid overall performance. You've achieved a commendable average across your subjects, showcasing your dedication.  Your performance in Data Visualization Lab and Mini Project is particularly impressive, achieving near-perfect scores. This highlights your practical skills and project management abilities.  You also demonstrated strength in Software Engineering and Theory of Computation.

While you passed all subjects, there's room for improvement in Computer Networks, Research Methodology and IPR, Environmental Studies, and Data Warehousing. Focusing on strengthening your understanding of the core concepts in these areas would be beneficial.

Your 72% attendance is slightly below the recommended 75%.  Regular attendance is crucial for grasping concepts effectively.  Improving your attendance next semester will positively impact your learning and overall performance.

Keep up the good work, Pavan! By focusing on the areas mentioned and main

In [21]:
type(chain.memory)

NoneType

In [23]:
from  langchain.memory import ConversationBufferWindowMemory
from langchain.chains import ConversationChain

memory = ConversationBufferWindowMemory(k=10)

convo = ConversationChain(
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro-002", temperature=0.5),
    memory = memory
)

In [24]:
convo.run("Who is Prime Minister of India")

'The current Prime Minister of India is Narendra Modi. He assumed office on May 26, 2014, after the Bharatiya Janata Party (BJP) won the general election.  This is his second term as Prime Minister.  His first term began in 2014 and ended in 2019. He was re-elected in the 2019 general election.  Modi previously served as the Chief Minister of Gujarat from 2001 to 2014.'

In [25]:
convo.run("What is sin(60 deg)")

'The sine of 60 degrees is √3/2, or approximately 0.866.'

In [26]:
convo.run("What is his current age?")

"Narendra Modi's current age is 73. He was born on September 17, 1950."

In [27]:
print(convo.memory.buffer)

Human: Who is Prime Minister of India
AI: The current Prime Minister of India is Narendra Modi. He assumed office on May 26, 2014, after the Bharatiya Janata Party (BJP) won the general election.  This is his second term as Prime Minister.  His first term began in 2014 and ended in 2019. He was re-elected in the 2019 general election.  Modi previously served as the Chief Minister of Gujarat from 2001 to 2014.
Human: What is sin(60 deg)
AI: The sine of 60 degrees is √3/2, or approximately 0.866.
Human: What is his current age?
AI: Narendra Modi's current age is 73. He was born on September 17, 1950.


## Persistent Chatbot

In [29]:
from langchain.memory import ConversationBufferMemory
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

# Sample student data loaded at session start
student_context = {
    "name": "Harsha",
    "sem": "5",
    "attendance": "72%",
    "result_data": extracted_data
}

# Define memory and inject student context into it
memory = ConversationBufferMemory(
    memory_key="chat_history",
    return_messages=True,
    input_key="query"
)

# Inject student data at start
initial_context = f"""
Student Name: {student_data['name']}
Overall Result : {student_data['overall_results']}
"""
memory.chat_memory.add_user_message("Student data for chatbot context:")
memory.chat_memory.add_ai_message(initial_context)

# Define prompt to include chat history
template = """
You are a student assistant bot. Use the following conversation and context to answer questions.

{chat_history}

Student's query: {query}
"""

prompt = PromptTemplate(
    input_variables=["chat_history", "query"],
    template=template
)

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", temperature=0.5)
chain = LLMChain(llm=llm, prompt=prompt, memory=memory)

In [17]:
response = chain.run("Calculate my overall percentage considering 1 as credit for all subjects")

In [16]:
chain.run("How is my academic graph over the semesters?")

"Harsha K L's academic performance, based on the simple average of total marks (assuming equal credit for all subjects), shows a generally consistent performance with a slight dip in semester 3 followed by an improvement. Here's a semester-wise breakdown:\n\n* **Semester 1:**  716/800  (approximately 89.5%)\n* **Semester 2:**  726/800  (approximately 90.75%)\n* **Semester 3:**  774/900  (approximately 86%)\n* **Semester 4:**  786/900  (approximately 87.33%)\n* **Semester 5:**  775/900  (approximately 86.11%)\n\nWhile the overall trend is positive, it's worth noting the slight decline in performance from semester 2 to semester 3 and 5.  However, without knowing the difficulty or weighting of the courses in each semester, it's hard to draw definitive conclusions about the reasons for these fluctuations.  More detailed analysis would require considering the individual subject performances and their respective credit hours."

In [53]:
chain.run("Which are my best scoring subjects?")

'Based on the provided data, Harsha K L\'s best scoring subjects are those where a total score of 100 was achieved.  These are:\n\n* **Semester 1:** PHYSICS FOR CSE STREAM\n* **Semester 3:** DATA STRUCTURES LAB, SOCIAL CONNECT AND RESPONSIBILITY, DATA ANALYTICS WITH EXCEL\n* **Semester 4:** ANALYSIS & DESIGN OF ALGORITHMS LAB, DISCRETE MATHEMATICAL STRUCTURES, MERN\n* **Semester 5:** DATA VISUALIZATION LAB, MINI PROJECT, PHYSICAL EDUCATION\n\n\nIt\'s important to note that some of these perfect scores (like SOCIAL CONNECT AND RESPONSIBILITY, PHYSICAL EDUCATION, and MINI PROJECT) might be due to the nature of the assessment or grading in those particular subjects.  Looking at subjects with high scores achieved through external examinations might provide a different perspective on "best performance."  For example, MATHEMATICS FOR CSE STREAM-I in Semester 1 (98 total) and COMPUTER-AIDED ENGINEERING DRAWING in Semester 2 (98 total) represent strong performances.'

In [54]:
chain.run("Provide me a overall feedback of my academic")

"Harsha K L has demonstrated consistent academic performance across five semesters, maintaining a representative average score of approximately 89.33%.  While there's a slight downward trend in average marks per semester from semester 1 to 5, the variation is minimal, and the overall performance remains commendable.  Harsha has achieved perfect scores in several subjects, particularly in lab-based courses, project work, and subjects like Physical Education and Social Connect and Responsibility.  However, strong performance is also evident in subjects like Mathematics and Computer-Aided Engineering Drawing, where high marks were earned through external examinations.  To improve the academic graph, focusing on consistent effort across all subjects and analyzing the reasons for minor dips in performance in some semesters could be beneficial.  Overall, Harsha's academic record is positive and suggests a good understanding of the coursework."

In [57]:
chain.run("give me code to plot my overall graph of my academics")

"```python\nimport matplotlib.pyplot as plt\nimport pandas as pd\n\n# Option 1: Average Total Marks per Semester\ndata1 = {'Semester': [1, 2, 3, 4, 5],\n         'Average Total Marks': [89.25, 91.75, 91.22, 93.11, 89.56]}\ndf1 = pd.DataFrame(data1)\n\nplt.figure(figsize=(8, 6))  # Adjust figure size for better visualization\nplt.plot(df1['Semester'], df1['Average Total Marks'], marker='o', linestyle='-')\nplt.title('Average Total Marks per Semester')\nplt.xlabel('Semester')\nplt.ylabel('Average Total Marks')\nplt.xticks(df1['Semester']) # Ensure x-axis ticks are integers\nplt.grid(True) # Add a grid for better readability\nplt.show()\n\n\n\n# Option 2: Total Marks Obtained per Semester\ndata2 = {'Semester': [1, 2, 3, 4, 5],\n         'Total Marks Obtained': [714, 734, 821, 838, 716]}\ndf2 = pd.DataFrame(data2)\n\nplt.figure(figsize=(8, 6))\nplt.plot(df2['Semester'], df2['Total Marks Obtained'], marker='o', linestyle='-')\nplt.title('Total Marks Obtained per Semester')\nplt.xlabel('Seme

In [58]:
chain.run("Can you generate the graph?")

"I can't directly generate and display images within this text-based interface.  I provided Python code in a previous response that uses `matplotlib` and `seaborn` to create the graphs. You need to execute that code in a Python environment (like Jupyter Notebook, Google Colab, or a local Python installation) to generate the actual graph images.\n\nHere are the basic steps again:\n\n1. **Install Libraries:** `pip install matplotlib pandas seaborn`\n2. **Copy the Code:** Copy the Python code from my previous response.\n3. **Replace Sample Data (Option 3):** If you're using Option 3 (subject-wise graph), replace the sample data with your actual data.\n4. **Run the Code:** Save the code as a Python file (e.g., `plot_grades.py`) and run it: `python plot_grades.py`\n\nThe graphs will then be displayed in a separate window or within your Python environment."

In [18]:
response

"Harsha K L's overall simple average across all semesters is approximately 92.12%, assuming all subjects have equal weight (1 credit).  This isn't a true weighted average, as it assumes all courses have the same credit hours. To calculate a true weighted average, you would need to provide the actual credit hours for each course."

In [19]:
# Format function (setup once)
def format_response(text):
    text = text.strip()
    text = text.replace('. ', '.\n')
    text = text.replace('- ', '• ')
    return text

print(format_response(response))

Harsha K L's overall simple average across all semesters is approximately 92.12%, assuming all subjects have equal weight (1 credit).
 This isn't a true weighted average, as it assumes all courses have the same credit hours.
To calculate a true weighted average, you would need to provide the actual credit hours for each course.


In [16]:
type(chain)

langchain.chains.llm.LLMChain

In [21]:
student_data

{'name': 'Harsha K L',
 'overall_results': [{'sem': 1,
   'results': [{'subject_code': 'BMATS101',
     'subject_name': 'MATHEMATICS FOR CSE STREAM-I',
     'internal': 48,
     'external': 50,
     'total': 98,
     'result': 'P'},
    {'subject_code': 'BPHYS102',
     'subject_name': 'PHYSICS FOR CSE STREAM',
     'internal': 50,
     'external': 50,
     'total': 100,
     'result': 'P'},
    {'subject_code': 'BPOPS103',
     'subject_name': 'PRINCIPLES OF PROGRAMMING USING C',
     'internal': 50,
     'external': 39,
     'total': 89,
     'result': 'P'},
    {'subject_code': 'BENGK106',
     'subject_name': 'COMMUNICATIVE ENGLISH',
     'internal': 45,
     'external': 37,
     'total': 82,
     'result': 'P'},
    {'subject_code': 'BICOK107',
     'subject_name': 'INDIAN CONSTITUTION',
     'internal': 41,
     'external': 27,
     'total': 68,
     'result': 'P'},
    {'subject_code': 'BIDTK158',
     'subject_name': 'INNOVATION AND DESIGN THINKING',
     'internal': 48,
     '

In [23]:
data = str(student_data)
data

"{'name': 'Harsha K L', 'overall_results': [{'sem': 1, 'results': [{'subject_code': 'BMATS101', 'subject_name': 'MATHEMATICS FOR CSE STREAM-I', 'internal': 48, 'external': 50, 'total': 98, 'result': 'P'}, {'subject_code': 'BPHYS102', 'subject_name': 'PHYSICS FOR CSE STREAM', 'internal': 50, 'external': 50, 'total': 100, 'result': 'P'}, {'subject_code': 'BPOPS103', 'subject_name': 'PRINCIPLES OF PROGRAMMING USING C', 'internal': 50, 'external': 39, 'total': 89, 'result': 'P'}, {'subject_code': 'BENGK106', 'subject_name': 'COMMUNICATIVE ENGLISH', 'internal': 45, 'external': 37, 'total': 82, 'result': 'P'}, {'subject_code': 'BICOK107', 'subject_name': 'INDIAN CONSTITUTION', 'internal': 41, 'external': 27, 'total': 68, 'result': 'P'}, {'subject_code': 'BIDTK158', 'subject_name': 'INNOVATION AND DESIGN THINKING', 'internal': 48, 'external': 28, 'total': 76, 'result': 'P'}, {'subject_code': 'BESCK104B', 'subject_name': 'INTRODUCTION TO ELECTRICAL ENGINEERING', 'internal': 50, 'external': 43,

In [24]:
data = data.replace('\'', '\"')
data

'{"name": "Harsha K L", "overall_results": [{"sem": 1, "results": [{"subject_code": "BMATS101", "subject_name": "MATHEMATICS FOR CSE STREAM-I", "internal": 48, "external": 50, "total": 98, "result": "P"}, {"subject_code": "BPHYS102", "subject_name": "PHYSICS FOR CSE STREAM", "internal": 50, "external": 50, "total": 100, "result": "P"}, {"subject_code": "BPOPS103", "subject_name": "PRINCIPLES OF PROGRAMMING USING C", "internal": 50, "external": 39, "total": 89, "result": "P"}, {"subject_code": "BENGK106", "subject_name": "COMMUNICATIVE ENGLISH", "internal": 45, "external": 37, "total": 82, "result": "P"}, {"subject_code": "BICOK107", "subject_name": "INDIAN CONSTITUTION", "internal": 41, "external": 27, "total": 68, "result": "P"}, {"subject_code": "BIDTK158", "subject_name": "INNOVATION AND DESIGN THINKING", "internal": 48, "external": 28, "total": 76, "result": "P"}, {"subject_code": "BESCK104B", "subject_name": "INTRODUCTION TO ELECTRICAL ENGINEERING", "internal": 50, "external": 43,

In [25]:
import ast

In [26]:
data = ast.literal_eval(data)
data

{'name': 'Harsha K L',
 'overall_results': [{'sem': 1,
   'results': [{'subject_code': 'BMATS101',
     'subject_name': 'MATHEMATICS FOR CSE STREAM-I',
     'internal': 48,
     'external': 50,
     'total': 98,
     'result': 'P'},
    {'subject_code': 'BPHYS102',
     'subject_name': 'PHYSICS FOR CSE STREAM',
     'internal': 50,
     'external': 50,
     'total': 100,
     'result': 'P'},
    {'subject_code': 'BPOPS103',
     'subject_name': 'PRINCIPLES OF PROGRAMMING USING C',
     'internal': 50,
     'external': 39,
     'total': 89,
     'result': 'P'},
    {'subject_code': 'BENGK106',
     'subject_name': 'COMMUNICATIVE ENGLISH',
     'internal': 45,
     'external': 37,
     'total': 82,
     'result': 'P'},
    {'subject_code': 'BICOK107',
     'subject_name': 'INDIAN CONSTITUTION',
     'internal': 41,
     'external': 27,
     'total': 68,
     'result': 'P'},
    {'subject_code': 'BIDTK158',
     'subject_name': 'INNOVATION AND DESIGN THINKING',
     'internal': 48,
     '

In [33]:
print(os.environ["GOOGLE_APPLICATION_CREDENTIALS"])

KeyError: 'GOOGLE_APPLICATION_CREDENTIALS'

In [1]:
chain.memory.buffer

NameError: name 'chain' is not defined

In [14]:
import requests
response = requests.get("http://localhost:3000/api/PythonScripts/studentResultsById/1BI22CD015")

In [17]:
response.json()['results']

{'name': 'Harsha K L',
 'overall_results': [{'semester': 1,
   'results': [{'subject_code': 'BMATS101',
     'subject_name': 'MATHEMATICS FOR CSE STREAM-I',
     'internal': 48,
     'external': 50,
     'total': 98,
     'result': 'P'},
    {'subject_code': 'BPHYS102',
     'subject_name': 'PHYSICS FOR CSE STREAM',
     'internal': 50,
     'external': 50,
     'total': 100,
     'result': 'P'},
    {'subject_code': 'BPOPS103',
     'subject_name': 'PRINCIPLES OF PROGRAMMING USING C',
     'internal': 50,
     'external': 39,
     'total': 89,
     'result': 'P'},
    {'subject_code': 'BENGK106',
     'subject_name': 'COMMUNICATIVE ENGLISH',
     'internal': 45,
     'external': 37,
     'total': 82,
     'result': 'P'},
    {'subject_code': 'BICOK107',
     'subject_name': 'INDIAN CONSTITUTION',
     'internal': 41,
     'external': 27,
     'total': 68,
     'result': 'P'},
    {'subject_code': 'BIDTK158',
     'subject_name': 'INNOVATION AND DESIGN THINKING',
     'internal': 48,
 