In [21]:
import pandas as pd
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
import numpy as np
from itertools import product
from transformers import pipeline
import json

# Sample JSON data
data = [
    {
        "student_id": 1,
        "age": 2,
        "sex": 2,
        "graduated_h_school_type": 1,
        "scholarship_type": 3,
        "additional_work": 2,
        "regular_activities": 1,
        "partner": 1,
        "questions_solved": 50,
        "marks_gained": 45,
        "avg_time_per_question": 2,
        "best_time_per_question": 1,
        "worst_time_per_question": 4,
        "subject_area": "Mathematics",
        "study_hours": 3,
        "study_method": 2,
        "reading_non_scientific": 2,
        "reading_scientific": 2,
        "attendance_seminars_dep": 1,
        "impact_of_projects": 1,
        "attendances_classes": 1,
        "preparation_midterm_company": 1,
        "preparation_midterm_time": 2,
        "taking_notes": 2,
        "listening": 2,
        "discussion_improves_interest": 2,
        "flip_classrom": 2,
        "grade_previous": 4,
        "grade_expected": 4,
        "biggest_challenge": "Time management during quizzes"
    }
]

# Convert data to DataFrame
df = pd.DataFrame(data)

In [22]:
# Define features and target
features = ["study_hours", "reading_non_scientific", "reading_scientific", "attendances_classes", "preparation_midterm_time", "discussion_improves_interest"]
target = "marks_gained"

# Preprocessing pipeline
preprocessor = ColumnTransformer(
    transformers=[
        ("num", "passthrough", features)
    ]
)

# Model pipeline
modelRF = Pipeline(steps=[
    ("preprocessor", preprocessor),
    ("regressor", RandomForestRegressor(n_estimators=100, random_state=42))
])

# Fit model
X = df[features]
y = df[target]
modelRF.fit(X, y)

# Generate counterfactuals
def generate_counterfactuals(student, attributes):
    keys, values = zip(*attributes.items())
    permutations = [dict(zip(keys, v)) for v in product(*values)]
    counterfactuals = []
    for perm in permutations:
        counterfactual = student.copy()
        counterfactual.update(perm)
        counterfactuals.append(counterfactual)
    return counterfactuals

# Attributes to manipulate
attributes = {
    "study_hours": [1, 2, 3, 4, 5],
    "reading_non_scientific": [1, 2, 3],
    "reading_scientific": [1, 2, 3],
    "attendances_classes": [1, 2, 3],
    "preparation_midterm_time": [1, 2, 3],
    "discussion_improves_interest": [1, 2, 3]
}

# Apply counterfactuals and evaluate
student = data[0]
counterfactuals = generate_counterfactuals(student, attributes)
results = []

for cf in counterfactuals:
    cf_df = pd.DataFrame([cf])
    impact_score = modelRF.predict(cf_df[features])[0]
    results.append((cf, impact_score))

# Select top N counterfactuals
N = 5
top_counterfactuals = sorted(results, key=lambda x: x[1], reverse=True)[:N]

# Generate insights from top counterfactuals
insights = []
for cf, score in top_counterfactuals:
    insights.append(f"""
    Changing the following attributes leads to a predicted score of {score}:
    - Study Hours: {cf['study_hours']}
    - Reading Non-Scientific: {cf['reading_non_scientific']}
    - Reading Scientific: {cf['reading_scientific']}
    - Class Attendance: {cf['attendances_classes']}
    - Midterm Preparation Time: {cf['preparation_midterm_time']}
    - Discussion Improves Interest: {cf['discussion_improves_interest']}
    """)

insights_summary = "\n".join(insights)

In [23]:
# Collect all relevant information
questionnaire_data = json.dumps(student, indent=4)
performance_summary = f"""
Total Questions Solved: {student['questions_solved']}
Marks Gained: {student['marks_gained']}
Average Time per Question: {student['avg_time_per_question']}
Best Time per Question: {student['best_time_per_question']}
Worst Time per Question: {student['worst_time_per_question']}
Subject Area: {student['subject_area']}
"""

# Function to evaluate recommendation
def evaluate_recommendation(recommendation):
    # Simulate evaluation by checking if specific practical advice is included
    keywords = ["increase", "improve", "adjust", "manage", "practice", "focus"]
    return any(keyword in recommendation.lower() for keyword in keywords)

In [3]:
!curl https://ollama.ai/install.sh | sh

  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current
                                 Dload  Upload   Total   Spent    Left  Speed
  0     0    0     0    0     0      0      0 --:--:-- --:--:-- --:--:--     0>>> Downloading ollama...
100 10975    0 10975    0     0  24095      0 --:--:-- --:--:-- --:--:-- 24067
############################################################################################# 100.0%
>>> Installing ollama to /usr/local/bin...
>>> Creating ollama user...
>>> Adding ollama user to video group...
>>> Adding current user to ollama group...
>>> Creating ollama systemd service...
>>> The Ollama API is now available at 127.0.0.1:11434.
>>> Install complete. Run "ollama" from the command line.


In [18]:
!nohup ollama serve &

nohup: appending output to 'nohup.out'


In [13]:
!ollama run phi3:medium

[?25l⠙ [?25h[?25l[2K[1G⠹ [?25h[?25l[2K[1G⠸ [?25h[?25l[2K[1G⠼ [?25h[?25l[2K[1G⠴ [?25h[?25l[2K[1G⠦ [?25h[?25l[2K[1G⠧ [?25h[?25l[2K[1G⠇ [?25h[?25l[2K[1G⠏ [?25h[?25l[2K[1G⠋ [?25h[?25l[2K[1G⠙ [?25h[?25l[2K[1G⠹ [?25h[?25l[2K[1G⠸ [?25h[?25l[2K[1G⠼ [?25h[?25l[2K[1G⠴ [?25h[?25l[2K[1G⠦ [?25h[?25l[2K[1G⠧ [?25h[?25l[2K[1G⠇ [?25h[?25l[2K[1G⠏ [?25h[?25l[2K[1G⠋ [?25h[?25l[2K[1G⠙ [?25h[?25l[2K[1G⠹ [?25h[?25l[2K[1G⠸ [?25h[?25l[2K[1G⠼ [?25h[?25l[2K[1G⠴ [?25h[?25l[2K[1G⠦ [?25h[?25l[2K[1G⠧ [?25h[?25l[2K[1G⠇ [?25h[?25l[2K[1G⠏ [?25h[?25l[2K[1G⠋ [?25h[?25l[2K[1G⠙ [?25h[?25l[2K[1G⠹ [?25h[?25l[2K[1G⠸ [?25h[?25l[2K[1G⠼ [?25h[?25l[2K[1G⠴ [?25h[?25l[2K[1G⠦ [?25h[?25l[2K[1G⠧ [?25h[?25l[2K[1G⠇ [?25h[?25l[2K[1G⠏ [?25h[?25l[2K[1G⠋ [?25h[?25l[2K[1G⠙ [?25h[?25l[2K[1G⠹ [?25h[?25l[2K[1G⠸ [?25h[?25l[2K[1G⠼ [?25h[?25l[2K[1G⠴ [?25h[?25l[2K[1G⠦ [

In [15]:
!pip install ollama

Collecting ollama
  Downloading ollama-0.2.1-py3-none-any.whl (9.7 kB)
Collecting httpx<0.28.0,>=0.27.0 (from ollama)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m2.9 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx<0.28.0,>=0.27.0->ollama)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m10.6 MB/s[0m eta [36m0:00:00[0m
Collecting h11<0.15,>=0.13 (from httpcore==1.*->httpx<0.28.0,>=0.27.0->ollama)
  Downloading h11-0.14.0-py3-none-any.whl (58 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m58.3/58.3 kB[0m [31m10.3 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: h11, httpcore, httpx, ollama
Successfully installed h11-0.14.0 httpcore-1.0.5 httpx-0.27.0 ollama-0.2.1


In [19]:

response = ollama.chat(model='phi3:medium', messages=[
  {
    'role': 'user',
    'content': prompt_var,
  },
])
print(response['message']['content'])

 The color of the sky is due to a process called Rayleastern scattering. As sunlight reaches Earth's atmosphere, it interacts with gas molecules, dust particles, and other substances present in the air. While sunlight consists of different colors (or wavelengths) combined together, each color scatters differently when it encounters these atmospheric particles.

Blue light has a shorter wavelength compared to other visible colors like red or yellow, which makes it scatter more easily upon hitting gas molecules in the Earth's atmosphere. As blue light scatters throughout the sky from all directions, our eyes perceive that scattered blue light, making the sky appear blue during daylight hours.

This phenomenon is most pronounced when the Sun is at a high angle in the sky (around noon). When the sun is low on the horizon, like during sunrise or sunset, its light has to pass through more atmosphere before reaching us. During these times, red and orange wavelebands dominate because they scat

In [25]:
from transformers import TextStreamer
import ollama

# Generate and refine recommendations
for i in range(10):  # Maximum 10 iterations for refinement
    prompt_var = f"""
    Based on the following questionnaire data, performance summary, and insights, provide specific and practical recommendations for the student:

    Questionnaire Data:
    {questionnaire_data}

    Performance Summary:
    {performance_summary}

    Insights:
    {insights_summary}

    Additionally, the student has identified their biggest challenge with the NSMQ as: "{student['biggest_challenge']}". Provide targeted recommendations to address this challenge as well.
    """

    outputs = ollama.chat(model='phi3:medium', messages=[
      {
        'role': 'user',
        'content': prompt_var,
      },
    ])

    recommendation_text = response['message']['content']

    if evaluate_recommendation(recommendation_text):
        break  # If the recommendation is practical and effective, stop refining

print(recommendation_text)

 The color of the sky is due to a process called Rayleastern scattering. As sunlight reaches Earth's atmosphere, it interacts with gas molecules, dust particles, and other substances present in the air. While sunlight consists of different colors (or wavelengths) combined together, each color scatters differently when it encounters these atmospheric particles.

Blue light has a shorter wavelength compared to other visible colors like red or yellow, which makes it scatter more easily upon hitting gas molecules in the Earth's atmosphere. As blue light scatters throughout the sky from all directions, our eyes perceive that scattered blue light, making the sky appear blue during daylight hours.

This phenomenon is most pronounced when the Sun is at a high angle in the sky (around noon). When the sun is low on the horizon, like during sunrise or sunset, its light has to pass through more atmosphere before reaching us. During these times, red and orange wavelebands dominate because they scat