In [None]:
pip install transformers torch

from transformers import pipeline
from datetime import datetime
import pinecone

Defaulting to user installation because normal site-packages is not writeable
Collecting transformers
  Using cached transformers-4.44.1-py3-none-any.whl.metadata (43 kB)
Collecting torch
  Using cached torch-2.4.0-cp312-cp312-win_amd64.whl.metadata (27 kB)
Collecting filelock (from transformers)
  Using cached filelock-3.15.4-py3-none-any.whl.metadata (2.9 kB)
Collecting huggingface-hub<1.0,>=0.23.2 (from transformers)
  Using cached huggingface_hub-0.24.6-py3-none-any.whl.metadata (13 kB)
Collecting numpy>=1.17 (from transformers)
  Using cached numpy-2.1.0-cp312-cp312-win_amd64.whl.metadata (59 kB)
Collecting pyyaml>=5.1 (from transformers)
  Using cached PyYAML-6.0.2-cp312-cp312-win_amd64.whl.metadata (2.1 kB)
Collecting regex!=2019.12.17 (from transformers)
  Using cached regex-2024.7.24-cp312-cp312-win_amd64.whl.metadata (41 kB)
Collecting requests (from transformers)
  Using cached requests-2.32.3-py3-none-any.whl.metadata (4.6 kB)
Collecting safetensors>=0.4.1 (from transformer

ERROR: Could not install packages due to an OSError: [WinError 32] The process cannot access the file because it is being used by another process: 'C:\\Users\\shafi\\AppData\\Local\\Packages\\PythonSoftwareFoundation.Python.3.12_qbz5n2kfra8p0\\LocalCache\\local-packages\\Python312\\site-packages\\numpy\\testing\\print_coercion_tables.py'
Check the permissions.



# Initialize sentiment analysis pipeline

In [None]:
sentiment_analyzer = pipeline("sentiment-analysis")

def analyze_sentiment(review_text):
    result = sentiment_analyzer(review_text)[0]
    # Convert 'POSITIVE'/'NEGATIVE' to numerical score
    score = 1 if result['label'] == 'POSITIVE' else -1
    return score, result['score']  # Return sentiment score and confidence

# Inserting Data into Pinecone with Sentiment Scores


In [None]:
def insert_review_with_sentiment(rating_data):
    # Initialize Pinecone
    pinecone.init(api_key="b248fc7d-6e9b-4441-aafd-7f96a88613e1")
    index = pinecone.Index("rag")

    # Analyze sentiment
    sentiment_score, sentiment_confidence = analyze_sentiment(rating_data['review'])

    # Create a unique ID
    review_id = f"{rating_data['professor'].replace(' ', '_').lower()}_{datetime.now().timestamp()}"

    # Construct the data to upsert into Pinecone
    data = {
        "id": review_id,
        "values": sentiment_score,
        "metadata": {
            **rating_data,
            "sentiment_score": sentiment_score,
            "sentiment_confidence": sentiment_confidence,
            "timestamp": datetime.now().isoformat(),
        },
    }

    # Upsert into Pinecone
    index.upsert(vectors=[data])
    print(f"Inserted review with ID {review_id}")



# Local File Read

In [None]:
insert_review_with_sentiment(rating_data)
Load reviews from JSON file

with open('review.json', 'r') as file:
    review_data = json.load(file)

# Process each review
for review in review_data['reviews']:
    insert_review_with_sentiment(review)

print("All reviews have been processed and inserted into Pinecone.")

# Collab File Upload

In [None]:
from google.colab import files
import json

# Upload the file
uploaded = files.upload()

# Check the uploaded files
uploaded

import os

# List files in the current directory
os.listdir()


import json

# Open and load the JSON file with the correct filename
with open('reviews.json', 'r') as file:
    data = json.load(file)

# Print the data to see its contents
print(data)




# Trend Tracking Over Time

In [None]:
import os
import numpy as np
from datetime import datetime
from pinecone import Pinecone, ServerlessSpec

# Initialize Pinecone with your API key
pc = Pinecone(api_key="")

def get_sentiment_trend(professor_name, period="weekly"):
    # Access the Pinecone index
    index = pc.Index("rag")

    # Query for the professor's reviews
    response = index.query(
        filter={"professor": professor_name},
        top_k=3,  # Adjust based on expected data volume
        include_metadata=True
    )

    # Extract sentiment scores and timestamps
    sentiments = [
        (item['metadata']['sentiment_score'], datetime.fromisoformat(item['metadata']['timestamp']))
        for item in response['matches']
    ]

    # Sort by timestamp
    sentiments.sort(key=lambda x: x[1])

    # Aggregate based on period (e.g., weekly)
    trend = {}
    for score, timestamp in sentiments:
        key = timestamp.date() if period == "daily" else timestamp.strftime('%Y-%W')  # Weekly
        if key not in trend:
            trend[key] = []
        trend[key].append(score)
    
    # Calculate moving averages
    moving_average = {
        key: np.mean(scores) for key, scores in trend.items()
    }

    return moving_average

# Example Usage
trend = get_sentiment_trend("Marie Curie", period="weekly")
print("Sentiment Trend:", trend)


# Visualization & Reporting

In [2]:
import numpy as np
from datetime import datetime, timedelta
import matplotlib.pyplot as plt

# Simulated data
reviews = [
    {"professor": "Marie Curie", "stars": 4.5},
    {"professor": "Albert Einstein", "stars": 4.9},
    {"professor": "Isaac Newton", "stars": 4.7},
    {"professor": "Ada Lovelace", "stars": 4.8},
    {"professor": "Charles Darwin", "stars": 4.4},
    {"professor": "Rosalind Franklin", "stars": 4.6},
    {"professor": "Nikola Tesla", "stars": 4.7},
    {"professor": "Alan Turing", "stars": 4.9},
    {"professor": "Galileo Galilei", "stars": 4.5},
    {"professor": "Leonardo da Vinci", "stars": 4.8},
    {"professor": "Gregor Mendel", "stars": 4.4},
    {"professor": "Florence Nightingale", "stars": 4.7},
    {"professor": "Alexander Fleming", "stars": 4.5},
    {"professor": "James Clerk Maxwell", "stars": 4.6},
    {"professor": "Rachel Carson", "stars": 4.8},
    {"professor": "Richard Feynman", "stars": 4.9},
    {"professor": "Carl Linnaeus", "stars": 4.3},
    {"professor": "Dorothy Hodgkin", "stars": 4.6},
    {"professor": "Erwin Schrödinger", "stars": 4.8},
    {"professor": "Niels Bohr", "stars": 4.7}
]

def simulate_timestamps(reviews):
    # Simulate weekly timestamps for reviews
    base_date = datetime.now() - timedelta(weeks=len(reviews))
    for i, review in enumerate(reviews):
        review['timestamp'] = (base_date + timedelta(weeks=i)).isoformat()
    return reviews

def get_sentiment_trend(professor_name, period="weekly"):
    # Simulate timestamps for the reviews
    reviews_with_timestamps = simulate_timestamps(reviews)

    # Filter reviews by the professor's name
    filtered_reviews = [r for r in reviews_with_timestamps if r['professor'] == professor_name]

    # Extract sentiment scores and timestamps
    sentiments = [
        (r['stars'], datetime.fromisoformat(r['timestamp']))
        for r in filtered_reviews
    ]

    # Sort by timestamp
    sentiments.sort(key=lambda x: x[1])

    # Aggregate based on period (e.g., weekly)
    trend = {}
    for score, timestamp in sentiments:
        key = timestamp.date() if period == "daily" else timestamp.strftime('%Y-%W')  # Weekly
        if key not in trend:
            trend[key] = []
        trend[key].append(score)
    
    # Calculate moving averages
    moving_average = {
        key: np.mean(scores) for key, scores in trend.items()
    }

    return moving_average

def plot_sentiment_trend(professor_name):
    trend = get_sentiment_trend(professor_name, period="weekly")

    # Plotting the trend
    dates = list(trend.keys())
    scores = list(trend.values())

    plt.figure(figsize=(10, 5))
    plt.plot(dates, scores, marker='o')
    plt.title(f"Sentiment Trend for {professor_name.capitalize()}")
    plt.xlabel("Time (Weekly)")
    plt.ylabel("Sentiment Score")
    plt.xticks(rotation=45)
    plt.grid(True)
    plt.show()

# Example Usage
plot_sentiment_trend("Marie Curie")


^C
Note: you may need to restart the kernel to use updated packages.


Defaulting to user installation because normal site-packages is not writeable
Collecting matplotlib
  Downloading matplotlib-3.9.2-cp312-cp312-win_amd64.whl.metadata (11 kB)
Collecting contourpy>=1.0.1 (from matplotlib)
  Downloading contourpy-1.2.1-cp312-cp312-win_amd64.whl.metadata (5.8 kB)
Collecting cycler>=0.10 (from matplotlib)
  Downloading cycler-0.12.1-py3-none-any.whl.metadata (3.8 kB)
Collecting fonttools>=4.22.0 (from matplotlib)
  Downloading fonttools-4.53.1-cp312-cp312-win_amd64.whl.metadata (165 kB)
Collecting kiwisolver>=1.3.1 (from matplotlib)
  Downloading kiwisolver-1.4.5-cp312-cp312-win_amd64.whl.metadata (6.5 kB)
Collecting numpy>=1.23 (from matplotlib)
  Using cached numpy-2.1.0-cp312-cp312-win_amd64.whl.metadata (59 kB)
Collecting pillow>=8 (from matplotlib)
  Downloading pillow-10.4.0-cp312-cp312-win_amd64.whl.metadata (9.3 kB)
Collecting pyparsing>=2.3.1 (from matplotlib)
  Downloading pyparsing-3.1.2-py3-none-any.whl.metadata (5.1 kB)
Downloading matplotlib-3