In [11]:
import json
import nltk
from nltk.corpus import stopwords, wordnet
from nltk.tokenize import word_tokenize
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.svm import SVC
import joblib
import numpy as np

# Download NLTK resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')

# Load the trained model and vectorizer
svm_classifier = joblib.load('svm_model.joblib')
vectorizer = joblib.load('vectorizer.joblib')

# Function to preprocess a single journal entry
def preprocess_entry(entry):
    # Tokenization
    tokens = word_tokenize(entry)
    
    # Lowercasing
    tokens = [token.lower() for token in tokens]
    
    # Remove stopwords and punctuation
    stop_words = set(stopwords.words('english'))
    tokens = [token for token in tokens if token not in stop_words and token.isalnum()]
    
    return ' '.join(tokens)

# Function to assign core value to preprocessed journal entry
def assign_core_value(entry, core_values_keywords):
    # Initialize dictionary to store keyword frequency for each core value
    keyword_freq = {core_value: 0 for core_value in core_values_keywords.keys()}
    
    # Count frequency of core value keywords and their synonyms in the entry
    for word in entry.split():
        for core_value, keywords in core_values_keywords.items():
            if word in keywords:
                keyword_freq[core_value] += 1
            else:
                # Check if word has any synonyms in core value keywords
                synonyms = get_synonyms(word)
                for syn in synonyms:
                    if syn in keywords:
                        keyword_freq[core_value] += 1
    
    # Get core value with maximum keyword frequency
    max_core_value = max(keyword_freq, key=keyword_freq.get)
    
    return max_core_value

# Read core values and their keywords from the JSON file
with open('core_values_keywords.txt', 'r') as file:
    core_values_keywords = json.load(file)

# Define a function to get synonyms of a word using WordNet
def get_synonyms(word):
    synonyms = set()
    for syn in wordnet.synsets(word):
        for lemma in syn.lemmas():
            synonyms.add(lemma.name())
    return synonyms

# Read journal entries from the text file
with open('journal_entries.txt', 'r') as file:
    journal_entries = file.readlines()
    journal_entries = [entry.strip() for entry in journal_entries]

# Preprocess new journal entries
preprocessed_new_entries = [preprocess_entry(entry) for entry in journal_entries]

# Vectorize the new text data using the same vectorizer used during training
X_new = vectorizer.transform(preprocessed_new_entries)

# Predict core values for new entries
predicted_core_values = svm_classifier.predict(X_new)

# Count occurrences of each core value
unique_core_values, counts = np.unique(predicted_core_values, return_counts=True)
core_value_counts = dict(zip(unique_core_values, counts))

# Calculate percentage for each core value
total_entries = len(journal_entries)
core_value_percentages = {core_value: (count / total_entries) * 100 for core_value, count in core_value_counts.items()}

# Print predicted core values for each new entry and their percentages
for i, core_value in enumerate(predicted_core_values, 1):
    print(f"New Journal Entry {i}: {journal_entries[i-1]}")
    print(f"Predicted Core Value: {core_value}")
    print()

# Print core value percentages
for core_value in core_values_keywords.keys():
    percentage = core_value_percentages.get(core_value, 0)
    print(f"{core_value}: {percentage:.2f}%")


New Journal Entry 1: Today, I am grateful for the love and support of my family.
Predicted Core Value: Gratitude

New Journal Entry 2: I appreciate the little things in life, like the warmth of the sun and the beauty of nature.
Predicted Core Value: Gratitude

New Journal Entry 3: Expressing gratitude for the kindness of strangers fills my heart with joy.
Predicted Core Value: Gratitude

New Journal Entry 4: Today was a productive day! Started the morning with a brisk walk, which helped clear my mind. At work, I tackled a challenging project and made significant progress. Took a break to have lunch with a friend, which was refreshing. In the evening, I attended a yoga class, followed by some reading before bed.
Predicted Core Value: Aspiration

New Journal Entry 5: Woke up feeling exhausted today, probably due to a restless night's sleep. Despite that, managed to push through and complete my tasks at work. Took some time in the evening to relax with a hot bath and a good book. Looking 

[nltk_data] Downloading package punkt to /Users/kartik/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/kartik/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to /Users/kartik/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
