## Workout vs Diet Related Text Analysis

In [1]:
import pandas as pd
import string
import nltk
import re
from nltk.tokenize import word_tokenize

In [2]:
workout_related_words = [
    'exercise', 'workout', 'fitness', 'strength', 'endurance', 'cardio', 'muscle', 
    'weightlifting', 'yoga', 'stretching', 'flexibility', 'health', 'wellness', 
    'bodybuilding', 'hiit', 'running', 'cycling', 'aerobics', 'pilates', 'recovery', 
    'training', 'stamina', 'core', 'weight loss', 'fat loss', 'calisthenics', 'strength training', 
    'powerlifting', 'resistance training', 'bodyweight exercises', 'squat', 'push-up', 
    'deadlift', 'plank', 'burpees', 'gym', 'personal trainer', 'fitness goals', 'performance', 
    'muscle gain', 'cardiovascular', 'fitness journey', 'healthy lifestyle', 'sports', 
    'motivation', 'active lifestyle',"fit"
]

diet_related_words = [
    'nutrition', 'healthy eating', 'calories', 'macronutrients', 'micronutrients', 
    'carbohydrates', 'proteins', 'fats', 'fiber', 'vitamins', 'minerals', 'meal plan', 
    'portion control', 'balanced diet', 'low-carb', 'keto', 'paleo', 'vegetarian', 
    'vegan', 'intermittent fasting', 'detox', 'gluten-free', 'organic', 'whole foods', 
    'processed foods', 'sugar-free', 'high-protein', 'low-fat', 'low-sodium', 'Mediterranean diet', 
    'meal prep', 'superfoods', 'antioxidants', 'water intake', 'hydration', 'nutrition labels', 
    'dietary restrictions', 'healthy snacks', 'gmo-free', 'clean eating', 'weight loss', 
    'calorie deficit', 'calorie surplus', 'food pyramid', 'food diary', 'portion sizes', 'nutrient-dense',"diet"
]


In [3]:
csv_file_path = r'C:\Users\Chandru R\Desktop\Document\MYSQL Data (CSV Format DataSet)\Text Message\conversation_text.csv'  
df = pd.read_csv(csv_file_path)

In [4]:
print("Original Data:")
print(df.head())

Original Data:
   chat_row                                       text_message
0         1  Today was the horrible day for my sugar-free. ...
1         2  Today was the horrible day for my gym. I feel ...
2         3  I great my sports session today! It really mad...
3         4  Today was the bad day for my plank. I feel so ...
4         5  Today was the disappointing day for my nutrien...


In [5]:
if 'text_message' not in df.columns:
    print(f"Error: 'message' column not found. Available columns are: {df.columns.tolist()}")
    exit()

In [6]:
nltk.download('punkt_tab')  # Download the 'punkt' tokenizer model (this is used for word tokenization)

[nltk_data] Downloading package punkt_tab to C:\Users\Chandru
[nltk_data]     R\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


True

In [7]:
def clean_text(text):
    text = text.lower()  
    text = re.sub(r"[@/{}[]/#!^$%&|~<>?^a-zA-Z0-9\s]", '', text)
    tokens = word_tokenize(text)
    return tokens
df['cleaned_text'] = df['text_message'].apply(clean_text)

In [8]:
print("\nCleaned Data:")
print(df[['text_message', 'cleaned_text']].head())


Cleaned Data:
                                        text_message  \
0  Today was the horrible day for my sugar-free. ...   
1  Today was the horrible day for my gym. I feel ...   
2  I great my sports session today! It really mad...   
3  Today was the bad day for my plank. I feel so ...   
4  Today was the disappointing day for my nutrien...   

                                        cleaned_text  
0  [today, was, the, horrible, day, for, my, suga...  
1  [today, was, the, horrible, day, for, my, gym,...  
2  [i, great, my, sports, session, today, !, it, ...  
3  [today, was, the, bad, day, for, my, plank, .,...  
4  [today, was, the, disappointing, day, for, my,...  


In [9]:
def count_words(tokens, word_list): # 7.Custom Word Counting
    return sum(1 for word in tokens if word in word_list) 

df['workout_related_words'] = df['cleaned_text'].apply(lambda tokens: count_words(tokens, workout_related_words))
df['diet_related_words'] = df['cleaned_text'].apply(lambda tokens: count_words(tokens, diet_related_words))

In [10]:
output_file_path = "C:/Users/Chandru R/Desktop/Document/Analysed NLP data/WorkOut vs Diet Wise Analysis/most_relevant_conversation.csv" 
if output_file_path:
    df.to_csv(output_file_path, index=False)  
    print(f"\nCleaned data has been saved to {output_file_path}")
else:
    print("No file path provided. Data was not saved.")


Cleaned data has been saved to C:/Users/Chandru R/Desktop/Document/Analysed NLP data/WorkOut vs Diet Wise Analysis/most_relevant_conversation.csv
