# PRE-PROCESS

In [40]:
import pandas as pd
import sqlite3
import re


In [41]:
csv_path = "V6 Updated_In-Depth_Analysis_of_Mental_Health_Dataset.csv"
df = pd.read_csv(csv_path)

df.head()
df.dtypes


gender                      object
age                          int64
current_fitness_lvl         object
exercise_timeline_weekly     int64
workout_companion           object
primary_goal                object
biggest_mental_shift        object
scale_feelings               int64
health_improvement          object
impact_on_daylife           object
diet_sleep_impact           object
fitness_tracker             object
sleeptime                   object
follow_dietroutine          object
hydration_attention         object
favorite_workout            object
post_impacts                object
music_impact                object
recovery_methods            object
additional_impacts          object
family_reaction             object
biggest_challenge           object
dtype: object

# Create a SQLite database from the CSV

In [42]:
# Create (or connect to) a SQLite database file
conn = sqlite3.connect("mental_health.db")

# Write the DataFrame into a SQL table
table_name = "mental_health"
df.to_sql(table_name, conn, if_exists="replace", index=False)

# Quick check: how many rows?
cur = conn.cursor()
cur.execute(f"SELECT COUNT(*) FROM {table_name}")
print("Rows in table:", cur.fetchone()[0])


Rows in table: 1930


# Utility function to run SQL and get a DataFrame

In [43]:
def run_sql(query: str):
    """
    Run a SQL query on the mental_health database and return a pandas DataFrame.
    """
    print("Executing SQL:\n", query)
    return pd.read_sql_query(query, conn)


In [44]:
test_query = f"""
SELECT primary_goal, AVG(age) AS avg_age, COUNT(*) AS n
FROM {table_name}
GROUP BY primary_goal
ORDER BY n DESC;
"""

run_sql(test_query)


Executing SQL:
 
SELECT primary_goal, AVG(age) AS avg_age, COUNT(*) AS n
FROM mental_health
GROUP BY primary_goal
ORDER BY n DESC;



Unnamed: 0,primary_goal,avg_age,n
0,Weight loss,25.996599,294
1,Muscle gain,26.46831,284
2,Mental stress relief,25.406699,209
3,Physical health,25.445545,202
4,"Physical health, Mental stress relief",27.935252,139
5,"Mental stress relief, Muscle gain",28.481818,110
6,"Physical health, Muscle gain",27.161905,105
7,"Physical health, Mental stress relief, Muscle ...",23.819048,105
8,"Physical health, Mental stress relief, Weight ...",24.772727,88
9,"Weight loss, Muscle gain",28.205128,78


# Describe the schema for the LLM

In [45]:
SCHEMA_DESCRIPTION = f"""
You are working with a SQLite database that has ONE table: {table_name}.

Table {table_name} has the following columns:

- gender (TEXT)
- age (INTEGER)
- current_fitness_lvl (TEXT)
- exercise_timeline_weekly (INTEGER) -- number of workout sessions/week
- workout_companion (TEXT)
- primary_goal (TEXT) -- e.g., Weight loss, Muscle gain, Mental health, etc.
- biggest_mental_shift (TEXT)
- scale_feelings (INTEGER) -- numeric rating scale
- health_improvement (TEXT)
- impact_on_daylife (TEXT)
- diet_sleep_impact (TEXT)
- fitness_tracker (TEXT)
- sleeptime (TEXT)
- follow_dietroutine (TEXT)
- hydration_attention (TEXT)
- favorite_workout (TEXT)
- post_impacts (TEXT)
- music_impact (TEXT)
- recovery_methods (TEXT)
- additional_impacts (TEXT)
- family_reaction (TEXT)
- biggest_challenge (TEXT)
"""
print(SCHEMA_DESCRIPTION)



You are working with a SQLite database that has ONE table: mental_health.

Table mental_health has the following columns:

- gender (TEXT)
- age (INTEGER)
- current_fitness_lvl (TEXT)
- exercise_timeline_weekly (INTEGER) -- number of workout sessions/week
- workout_companion (TEXT)
- primary_goal (TEXT) -- e.g., Weight loss, Muscle gain, Mental health, etc.
- biggest_mental_shift (TEXT)
- scale_feelings (INTEGER) -- numeric rating scale
- health_improvement (TEXT)
- impact_on_daylife (TEXT)
- diet_sleep_impact (TEXT)
- fitness_tracker (TEXT)
- sleeptime (TEXT)
- follow_dietroutine (TEXT)
- hydration_attention (TEXT)
- favorite_workout (TEXT)
- post_impacts (TEXT)
- music_impact (TEXT)
- recovery_methods (TEXT)
- additional_impacts (TEXT)
- family_reaction (TEXT)
- biggest_challenge (TEXT)



# Design the Text2SQL prompt

In [46]:
TEXT2SQL_SYSTEM_PROMPT = """
You are an expert data analyst who writes SQL for a SQLite database.

RULES:
- Write ONLY a SQL SELECT query, no explanation.
- Never use DELETE, INSERT, UPDATE, DROP, or CREATE.
- Use the table name exactly as given.
- Use valid SQLite syntax.
"""

TEXT2SQL_USER_TEMPLATE = """
Here is the database schema:
{schema}

User question:
{question}

Write a single SQL SELECT query that answers the question.
Return ONLY the SQL, no backticks, no explanation.
"""


# LLM call (stub + example)

In [47]:
# Stub (model-agnostic)

In [52]:
def call_llm(prompt: str) -> str:
    """
    Stub for calling an LLM. 
    Replace the body with the API or local model you want to use.
    It must return a plain text string with the model's response.
    """
    raise NotImplementedError("Implement this with your chosen LLM API or local model.")



In [63]:
import os

# Tell Hugging Face to cache everything on E:
os.environ["HF_HOME"] = r"E:\hf_models"
os.environ["TRANSFORMERS_CACHE"] = r"E:\hf_models\transformers"
os.environ["HF_DATASETS_CACHE"] = r"E:\hf_models\datasets"  # just in case you use datasets later


In [64]:
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
import torch

# Choose a small instruct/chat model
MODEL_NAME = "microsoft/Phi-3-mini-4k-instruct"
CACHE_DIR = r"E:\hf_models\transformers"   # same folder as above

# Load tokenizer and model (they will be downloaded to E: now)
tokenizer = AutoTokenizer.from_pretrained(
    MODEL_NAME,
    cache_dir=CACHE_DIR
)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    cache_dir=CACHE_DIR,
    device_map="auto",          # GPU if available, else CPU
    torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32
)

# Build a text-generation pipeline
text_gen = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=256,          # enough for SQL
    do_sample=False,             # deterministic (good for SQL)
)


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Downloading shards: 100%|██████████| 2/2 [17:46<00:00, 533.44s/it]
Loading checkpoint shards: 100%|██████████| 2/2 [00:07<00:00,  3.69s/it]
Device set to use cuda:0


# Connect the model to your Text2SQL pipeline

In [66]:
def call_llm(prompt: str) -> str:
    """
    Use the local Phi-3 model (via Hugging Face) to generate SQL.
    """

    # Combine system + user prompt
    full_prompt = TEXT2SQL_SYSTEM_PROMPT.strip() + "\n\n" + prompt.strip()

    # Generate completion
    outputs = text_gen(
        full_prompt,
        num_return_sequences=1
    )

    # Pipeline returns: [ { "generated_text": "<prompt> + <completion>" } ]
    full_text = outputs[0]["generated_text"]

    # Keep only the part AFTER the prompt
    completion = full_text[len(full_prompt):].strip()

    return completion


In [None]:
#HELPER

In [69]:
import re

def extract_sql_from_response(raw_response: str) -> str:
    """
    Given the raw LLM response, extract the first SQL SELECT statement.
    Handles cases where the model adds headers, explanations, or extra text.
    """
    text = raw_response.strip()

    # Remove common code-fence markers if present
    text = text.replace("```sql", "").replace("```", "").strip()

    # Find the first occurrence of 'select' (case-insensitive)
    match = re.search(r"\bselect\b", text, re.IGNORECASE)
    if not match:
        raise ValueError(f"No SELECT statement found in LLM response:\n{text}")

    # Take everything from 'select' onward
    sql = text[match.start():]

    # Optionally cut at the first semicolon (end of statement)
    semicolon_idx = sql.find(";")
    if semicolon_idx != -1:
        sql = sql[:semicolon_idx + 1]

    return sql.strip()


In [70]:
def question_to_sql(question: str) -> str:
    """
    Given a natural language question, ask the LLM to generate a SQL query.
    """
    user_prompt = TEXT2SQL_USER_TEMPLATE.format(
        schema=SCHEMA_DESCRIPTION,
        question=question
    )

    full_prompt = user_prompt  # system prompt is added inside call_llm

    raw_response = call_llm(full_prompt)

    # Use the robust extractor instead of naive .startswith("select")
    sql = extract_sql_from_response(raw_response)

    # Final safety check
    if not sql.lower().startswith("select"):
        raise ValueError(f"Extracted SQL does not start with SELECT:\n{sql}")

    return sql


In [71]:
q1 = "How many total respondents are there in the dataset?"
df_ans1 = ask_text2sql(q1)
df_ans1



Natural language question:
 How many total respondents are there in the dataset?

Generated SQL:
 SELECT COUNT(*) FROM mental_health;
Executing SQL:
 SELECT COUNT(*) FROM mental_health;


Unnamed: 0,COUNT(*)
0,1930


In [72]:
questions = [
    "How many total respondents are there in the dataset?",
    "What is the average age of all respondents?",
    "How many respondents have their primary goal as weight loss?",
    "For each primary_goal, show the number of respondents.",
    "For each primary_goal, show the average age and the number of respondents.",
    "For each current_fitness_lvl, what is the average exercise_timeline_weekly?",
    "How many respondents use a fitness tracker?",
    "How many respondents use a fitness tracker and also follow a diet routine?",
    "For each gender, show the average scale_feelings score and the number of respondents.",
    "For each favorite_workout, show the number of respondents, ordered by count descending.",
    "Among respondents who exercise at least 4 times per week, what is the average scale_feelings score?",
    "Which primary_goal has the highest average scale_feelings score?",
    "For each hydration_attention category, what is the average scale_feelings score?",
    "For each current_fitness_lvl, show the average age and average exercise_timeline_weekly, ordered by average exercise_timeline_weekly descending.",
    "How many respondents report that their diet_sleep_impact is 'Very Positive'?"
]

for i, q in enumerate(questions, start=1):
    print("\n" + "="*60)
    print(f"Q{i}: {q}")
    try:
        df_ans = ask_text2sql(q)
        display(df_ans.head(20))
    except Exception as e:
        print("Error:", e)



Q1: How many total respondents are there in the dataset?

Natural language question:
 How many total respondents are there in the dataset?

Generated SQL:
 SELECT COUNT(*) FROM mental_health;
Executing SQL:
 SELECT COUNT(*) FROM mental_health;


Unnamed: 0,COUNT(*)
0,1930



Q2: What is the average age of all respondents?

Natural language question:
 What is the average age of all respondents?

Generated SQL:
 SELECT AVG(age) FROM mental_health;
Executing SQL:
 SELECT AVG(age) FROM mental_health;


Unnamed: 0,AVG(age)
0,26.419689



Q3: How many respondents have their primary goal as weight loss?

Natural language question:
 How many respondents have their primary goal as weight loss?

Generated SQL:
 SELECT COUNT(*) FROM mental_health WHERE primary_goal = 'Weight loss';
Executing SQL:
 SELECT COUNT(*) FROM mental_health WHERE primary_goal = 'Weight loss';


Unnamed: 0,COUNT(*)
0,294



Q4: For each primary_goal, show the number of respondents.

Natural language question:
 For each primary_goal, show the number of respondents.

Generated SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal;


Unnamed: 0,primary_goal,respondent_count
0,Mental stress relief,209
1,"Mental stress relief, Muscle gain",110
2,"Mental stress relief, Others",3
3,"Mental stress relief, Weight loss",69
4,"Mental stress relief, Weight loss, Muscle gain",15
5,Muscle gain,284
6,"Muscle gain, Others",8
7,"Muscle gain,Weight loss",6
8,Others,12
9,Physical health,202



Q5: For each primary_goal, show the average age and the number of respondents.

Natural language question:
 For each primary_goal, show the average age and the number of respondents.

Generated SQL:
 SELECT primary_goal, AVG(age) AS average_age, COUNT(*) AS number_of_respondents
FROM mental_health
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, AVG(age) AS average_age, COUNT(*) AS number_of_respondents
FROM mental_health
GROUP BY primary_goal;


Unnamed: 0,primary_goal,average_age,number_of_respondents
0,Mental stress relief,25.406699,209
1,"Mental stress relief, Muscle gain",28.481818,110
2,"Mental stress relief, Others",25.666667,3
3,"Mental stress relief, Weight loss",25.101449,69
4,"Mental stress relief, Weight loss, Muscle gain",23.866667,15
5,Muscle gain,26.46831,284
6,"Muscle gain, Others",33.375,8
7,"Muscle gain,Weight loss",23.666667,6
8,Others,24.083333,12
9,Physical health,25.445545,202



Q6: For each current_fitness_lvl, what is the average exercise_timeline_weekly?

Natural language question:
 For each current_fitness_lvl, what is the average exercise_timeline_weekly?

Generated SQL:
 SELECT current_fitness_lvl, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY current_fitness_lvl;
Executing SQL:
 SELECT current_fitness_lvl, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY current_fitness_lvl;


Unnamed: 0,current_fitness_lvl,average_exercise_timeline_weekly
0,Advanced,5.132353
1,Beginner,2.697108
2,Expert,5.83237
3,Intermediate,3.820048



Q7: How many respondents use a fitness tracker?

Natural language question:
 How many respondents use a fitness tracker?

Generated SQL:
 SELECT COUNT(*) FROM mental_health WHERE fitness_tracker IS NOT NULL;
Executing SQL:
 SELECT COUNT(*) FROM mental_health WHERE fitness_tracker IS NOT NULL;


Unnamed: 0,COUNT(*)
0,1930


You seem to be using the pipelines sequentially on GPU. In order to maximize efficiency please use a dataset



Q8: How many respondents use a fitness tracker and also follow a diet routine?

Natural language question:
 How many respondents use a fitness tracker and also follow a diet routine?

Generated SQL:
 SELECT COUNT(*) FROM mental_health WHERE fitness_tracker IS NOT NULL AND follow_dietroutine IS NOT NULL;
Executing SQL:
 SELECT COUNT(*) FROM mental_health WHERE fitness_tracker IS NOT NULL AND follow_dietroutine IS NOT NULL;


Unnamed: 0,COUNT(*)
0,1930



Q9: For each gender, show the average scale_feelings score and the number of respondents.

Natural language question:
 For each gender, show the average scale_feelings score and the number of respondents.

Generated SQL:
 SELECT gender, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondents
FROM mental_health
GROUP BY gender;
Executing SQL:
 SELECT gender, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondents
FROM mental_health
GROUP BY gender;


Unnamed: 0,gender,average_scale_feelings,respondents
0,Female,6.310867,727
1,Male,6.691604,1203



Q10: For each favorite_workout, show the number of respondents, ordered by count descending.

Natural language question:
 For each favorite_workout, show the number of respondents, ordered by count descending.

Generated SQL:
 SELECT favorite_workout, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY favorite_workout
ORDER BY respondent_count DESC;
Executing SQL:
 SELECT favorite_workout, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY favorite_workout
ORDER BY respondent_count DESC;


Unnamed: 0,favorite_workout,respondent_count
0,Strength training,437
1,Cardio,254
2,"Strength training, High-Intensity Interval Tra...",200
3,"Cardio, Strength training",194
4,High-Intensity Interval Training (HIIT),151
5,Yoga,137
6,"Cardio, Yoga",113
7,"Cardio, High-Intensity Interval Training (HIIT)",76
8,"Cardio, Strength training, High-Intensity Inte...",71
9,"High-Intensity Interval Training (HIIT), Yoga",46



Q11: Among respondents who exercise at least 4 times per week, what is the average scale_feelings score?

Natural language question:
 Among respondents who exercise at least 4 times per week, what is the average scale_feelings score?

Generated SQL:
 SELECT AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE exercise_timeline_weekly >= 4;
Executing SQL:
 SELECT AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE exercise_timeline_weekly >= 4;


Unnamed: 0,average_scale_feelings
0,7.271352



Q12: Which primary_goal has the highest average scale_feelings score?

Natural language question:
 Which primary_goal has the highest average scale_feelings score?

Generated SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY primary_goal
ORDER BY average_scale_feelings DESC
LIMIT 1;
Executing SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY primary_goal
ORDER BY average_scale_feelings DESC
LIMIT 1;


Unnamed: 0,primary_goal,average_scale_feelings
0,"Physical health, Mental stress relief, Weight ...",8.284091



Q13: For each hydration_attention category, what is the average scale_feelings score?

Natural language question:
 For each hydration_attention category, what is the average scale_feelings score?

Generated SQL:
 SELECT hydration_attention, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY hydration_attention;
Executing SQL:
 SELECT hydration_attention, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY hydration_attention;


Unnamed: 0,hydration_attention,average_scale_feelings
0,None at all,5.612245
1,Quite a lot,6.757542
2,Very little,5.539007
3,Very much,7.817694



Q14: For each current_fitness_lvl, show the average age and average exercise_timeline_weekly, ordered by average exercise_timeline_weekly descending.

Natural language question:
 For each current_fitness_lvl, show the average age and average exercise_timeline_weekly, ordered by average exercise_timeline_weekly descending.

Generated SQL:
 SELECT current_fitness_lvl, AVG(age) AS average_age, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY current_fitness_lvl
ORDER BY average_exercise_timeline_weekly DESC;
Executing SQL:
 SELECT current_fitness_lvl, AVG(age) AS average_age, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY current_fitness_lvl
ORDER BY average_exercise_timeline_weekly DESC;


Unnamed: 0,current_fitness_lvl,average_age,average_exercise_timeline_weekly
0,Expert,30.635838,5.83237
1,Advanced,29.069853,5.132353
2,Intermediate,25.841787,3.820048
3,Beginner,24.940639,2.697108



Q15: How many respondents report that their diet_sleep_impact is 'Very Positive'?

Natural language question:
 How many respondents report that their diet_sleep_impact is 'Very Positive'?

Generated SQL:
 SELECT COUNT(*) FROM mental_health WHERE diet_sleep_impact = 'Very Positive';
Executing SQL:
 SELECT COUNT(*) FROM mental_health WHERE diet_sleep_impact = 'Very Positive';


Unnamed: 0,COUNT(*)
0,0


In [73]:
mixed_questions = [
    # 1. Descriptive + counts
    "List all distinct primary_goal values and show how many respondents fall into each goal, ordered from most common to least common.",
    "For each current_fitness_lvl, show the number of respondents and the average age.",
    "For each workout_companion category, show how many respondents are in that category and their average exercise_timeline_weekly.",
    "Show the top 10 most frequent biggest_challenge values and how many respondents reported each one.",
    "For each hydration_attention category, show the number of respondents and their average scale_feelings score.",

    # 2. Behavior + mental state (mixed)
    "For each primary_goal, show the average scale_feelings score and the number of respondents.",
    "Among respondents who use a fitness tracker, what is the average exercise_timeline_weekly and average scale_feelings score?",
    "For each favorite_workout, show the average scale_feelings score and the number of respondents, ordered by average scale_feelings descending.",
    "For each diet_sleep_impact category, show the number of respondents and their average scale_feelings score.",
    "Among respondents whose impact_on_daylife is very positive, what is their average exercise_timeline_weekly and scale_feelings score?",

    # 3. Comparisons between groups
    "Compare the average age and average exercise_timeline_weekly for respondents whose primary_goal is weight loss versus those whose primary_goal is muscle gain.",
    "Compare the average scale_feelings score for respondents who follow a diet routine versus those who do not follow a diet routine.",
    "Compare the average exercise_timeline_weekly for respondents who work out alone versus those who work out with friends.",
    "For each gender, show the average scale_feelings score and average exercise_timeline_weekly, ordered by average scale_feelings descending.",
    "Among respondents who use a fitness tracker, compare the average scale_feelings score for different primary_goal categories.",

    # 4. Filtered subset + grouped summary
    "Among respondents whose primary_goal is mental health, show the number of respondents for each current_fitness_lvl and the average scale_feelings score within each level.",
    "Among respondents who exercise at least 4 times per week, show the number of respondents for each primary_goal and the average scale_feelings score.",
    "For respondents who report that their family_reaction is very supportive, show the number of respondents for each primary_goal and the average scale_feelings score.",
    "Among respondents who follow a diet routine, show the number of respondents for each favorite_workout and the average scale_feelings score.",
    "Among respondents who do not use a fitness tracker, show the number of respondents for each current_fitness_lvl and the average exercise_timeline_weekly.",

    # 5. “Show me examples” / textual focus
    "Show 10 example rows with columns: age, primary_goal, workout_companion, favorite_workout, scale_feelings.",
    "Show 10 example biggest_challenge responses along with age and primary_goal.",
    "Show 10 example rows where the primary_goal is mental health, including columns: age, current_fitness_lvl, exercise_timeline_weekly, biggest_mental_shift.",
    "Show 10 example rows where the family_reaction is negative, including columns: age, primary_goal, impact_on_daylife, biggest_challenge.",
    "Show 10 example rows where hydration_attention is high, including columns: age, primary_goal, favorite_workout, scale_feelings.",

    # 6. Ranking / “best/worst” questions
    "Which primary_goal has the highest average exercise_timeline_weekly, and what is that average along with the number of respondents?",
    "Which favorite_workout has the highest average scale_feelings score, and how many respondents chose it?",
    "Which current_fitness_lvl has the highest average scale_feelings score, and how many respondents are in that level?",
    "Which hydration_attention category has the highest average scale_feelings score, and how many respondents are in that category?",
    "Which biggest_challenge appears most frequently in the dataset, and how many respondents reported it?"
]


In [74]:
for i, q in enumerate(mixed_questions, start=1):
    print("\n" + "="*80)
    print(f"Q{i}: {q}\n")
    try:
        df_ans = ask_text2sql(q)
        display(df_ans.head(20))   # show first 20 rows if big
    except Exception as e:
        print("Error:", e)



Q1: List all distinct primary_goal values and show how many respondents fall into each goal, ordered from most common to least common.


Natural language question:
 List all distinct primary_goal values and show how many respondents fall into each goal, ordered from most common to least common.

Generated SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal
ORDER BY respondent_count DESC;
Executing SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal
ORDER BY respondent_count DESC;


Unnamed: 0,primary_goal,respondent_count
0,Weight loss,294
1,Muscle gain,284
2,Mental stress relief,209
3,Physical health,202
4,"Physical health, Mental stress relief",139
5,"Mental stress relief, Muscle gain",110
6,"Physical health, Muscle gain",105
7,"Physical health, Mental stress relief, Muscle ...",105
8,"Physical health, Mental stress relief, Weight ...",88
9,"Weight loss, Muscle gain",78



Q2: For each current_fitness_lvl, show the number of respondents and the average age.


Natural language question:
 For each current_fitness_lvl, show the number of respondents and the average age.

Generated SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(age) AS average_age
FROM mental_health
GROUP BY current_fitness_lvl;
Executing SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(age) AS average_age
FROM mental_health
GROUP BY current_fitness_lvl;


Unnamed: 0,current_fitness_lvl,number_of_respondents,average_age
0,Advanced,272,29.069853
1,Beginner,657,24.940639
2,Expert,173,30.635838
3,Intermediate,828,25.841787



Q3: For each workout_companion category, show how many respondents are in that category and their average exercise_timeline_weekly.


Natural language question:
 For each workout_companion category, show how many respondents are in that category and their average exercise_timeline_weekly.

Generated SQL:
 SELECT workout_companion, COUNT(*) AS respondent_count, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY workout_companion;
Executing SQL:
 SELECT workout_companion, COUNT(*) AS respondent_count, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
GROUP BY workout_companion;


Unnamed: 0,workout_companion,respondent_count,average_exercise_timeline_weekly
0,Alone - focused,653,3.592649
1,"Alone - focused, Alone - lonely",19,4.263158
2,"Alone - focused, Alone - lonely, Alone - unmot...",2,1.0
3,"Alone - focused, Alone - lonely, Together - di...",3,3.333333
4,"Alone - focused, Alone - lonely, Together - fo...",5,4.2
5,"Alone - focused, Alone - lonely, Together - fo...",1,4.0
6,"Alone - focused, Alone - unmotivated",7,3.428571
7,"Alone - focused, Alone - unmotivated, Together...",4,4.5
8,"Alone - focused, Together - distracted",64,4.15625
9,"Alone - focused, Together - focused",214,4.570093



Q4: Show the top 10 most frequent biggest_challenge values and how many respondents reported each one.


Natural language question:
 Show the top 10 most frequent biggest_challenge values and how many respondents reported each one.

Generated SQL:
 SELECT biggest_challenge, COUNT(*) AS frequency
FROM mental_health
GROUP BY biggest_challenge
ORDER BY frequency DESC
LIMIT 10;
Executing SQL:
 SELECT biggest_challenge, COUNT(*) AS frequency
FROM mental_health
GROUP BY biggest_challenge
ORDER BY frequency DESC
LIMIT 10;


Unnamed: 0,biggest_challenge,frequency
0,Lack of time,427
1,Motivation issues,197
2,"Financial cost, Lack of time",160
3,"Lack of time, Motivation issues",142
4,Financial cost,116
5,"Lack of time, Lack of guidance or knowledge",84
6,Lack of guidance or knowledge,68
7,None of the above,64
8,"Lack of guidance or knowledge, Social or famil...",64
9,Physical limitations,62



Q5: For each hydration_attention category, show the number of respondents and their average scale_feelings score.


Natural language question:
 For each hydration_attention category, show the number of respondents and their average scale_feelings score.

Generated SQL:
 SELECT hydration_attention, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY hydration_attention;
Executing SQL:
 SELECT hydration_attention, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY hydration_attention;


Unnamed: 0,hydration_attention,respondent_count,average_scale_feelings
0,None at all,98,5.612245
1,Quite a lot,895,6.757542
2,Very little,564,5.539007
3,Very much,373,7.817694



Q6: For each primary_goal, show the average scale_feelings score and the number of respondents.


Natural language question:
 For each primary_goal, show the average scale_feelings score and the number of respondents.

Generated SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY primary_goal;


Unnamed: 0,primary_goal,average_scale_feelings,respondent_count
0,Mental stress relief,5.822967,209
1,"Mental stress relief, Muscle gain",8.018182,110
2,"Mental stress relief, Others",7.333333,3
3,"Mental stress relief, Weight loss",5.449275,69
4,"Mental stress relief, Weight loss, Muscle gain",6.733333,15
5,Muscle gain,6.482394,284
6,"Muscle gain, Others",6.375,8
7,"Muscle gain,Weight loss",6.666667,6
8,Others,5.5,12
9,Physical health,6.435644,202



Q7: Among respondents who use a fitness tracker, what is the average exercise_timeline_weekly and average scale_feelings score?


Natural language question:
 Among respondents who use a fitness tracker, what is the average exercise_timeline_weekly and average scale_feelings score?

Generated SQL:
 SELECT AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE fitness_tracker IS NOT NULL AND fitness_tracker <> '';
Executing SQL:
 SELECT AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE fitness_tracker IS NOT NULL AND fitness_tracker <> '';


Unnamed: 0,average_exercise_timeline_weekly,average_scale_feelings
0,3.803109,6.548187



Q8: For each favorite_workout, show the average scale_feelings score and the number of respondents, ordered by average scale_feelings descending.


Natural language question:
 For each favorite_workout, show the average scale_feelings score and the number of respondents, ordered by average scale_feelings descending.

Generated SQL:
 SELECT favorite_workout, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS number_of_respondents
FROM mental_health
GROUP BY favorite_workout
ORDER BY average_scale_feelings DESC;
Executing SQL:
 SELECT favorite_workout, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS number_of_respondents
FROM mental_health
GROUP BY favorite_workout
ORDER BY average_scale_feelings DESC;


Unnamed: 0,favorite_workout,average_scale_feelings,number_of_respondents
0,"Cardio, Strength training, High-Intensity Inte...",10.0,1
1,"Strength training, High-Intensity Interval Tra...",9.6,5
2,"Cardio, Strength training, High-Intensity Inte...",9.5,2
3,"Cardio, High-Intensity Interval Training (HIIT...",9.5,4
4,"High-Intensity Interval Training (HIIT), Yoga,...",9.0,2
5,"Cardio, Strength training, High-Intensity Inte...",8.8,5
6,"Cardio, Strength training, Yoga",7.96,25
7,"Cardio, Strength training, High-Intensity Inte...",7.704225,71
8,"Strength training, High-Intensity Interval Tra...",7.7,20
9,"High-Intensity Interval Training (HIIT), Yoga",7.695652,46



Q9: For each diet_sleep_impact category, show the number of respondents and their average scale_feelings score.


Natural language question:
 For each diet_sleep_impact category, show the number of respondents and their average scale_feelings score.

Generated SQL:
 SELECT diet_sleep_impact, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY diet_sleep_impact;
Executing SQL:
 SELECT diet_sleep_impact, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY diet_sleep_impact;


Unnamed: 0,diet_sleep_impact,respondent_count,average_scale_feelings
0,No,618,5.804207
1,Yes,1312,6.898628



Q10: Among respondents whose impact_on_daylife is very positive, what is their average exercise_timeline_weekly and scale_feelings score?


Natural language question:
 Among respondents whose impact_on_daylife is very positive, what is their average exercise_timeline_weekly and scale_feelings score?

Generated SQL:
 SELECT AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE impact_on_daylife ='very positive';
Executing SQL:
 SELECT AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE impact_on_daylife ='very positive';


Unnamed: 0,average_exercise_timeline_weekly,average_scale_feelings
0,,



Q11: Compare the average age and average exercise_timeline_weekly for respondents whose primary_goal is weight loss versus those whose primary_goal is muscle gain.


Natural language question:
 Compare the average age and average exercise_timeline_weekly for respondents whose primary_goal is weight loss versus those whose primary_goal is muscle gain.

Generated SQL:
 SELECT primary_goal, AVG(age) AS average_age, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
WHERE primary_goal IN ('Weight loss', 'Muscle gain')
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, AVG(age) AS average_age, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
WHERE primary_goal IN ('Weight loss', 'Muscle gain')
GROUP BY primary_goal;


Unnamed: 0,primary_goal,average_age,average_exercise_timeline_weekly
0,Muscle gain,26.46831,4.130282
1,Weight loss,25.996599,3.360544



Q12: Compare the average scale_feelings score for respondents who follow a diet routine versus those who do not follow a diet routine.


Natural language question:
 Compare the average scale_feelings score for respondents who follow a diet routine versus those who do not follow a diet routine.

Generated SQL:
 SELECT follow_dietroutine, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY follow_dietroutine;
Executing SQL:
 SELECT follow_dietroutine, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
GROUP BY follow_dietroutine;


Unnamed: 0,follow_dietroutine,average_scale_feelings
0,Always,7.878173
1,Never,5.356688
2,Often,6.802198
3,Rarely,6.210084
4,Sometimes,6.502326



Q13: Compare the average exercise_timeline_weekly for respondents who work out alone versus those who work out with friends.


Natural language question:
 Compare the average exercise_timeline_weekly for respondents who work out alone versus those who work out with friends.

Generated SQL:
 SELECT
  workout_companion,
  AVG(exercise_timeline_weekly) AS average_sessions_per_week
FROM
  mental_health
GROUP BY
  workout_companion;
Executing SQL:
 SELECT
  workout_companion,
  AVG(exercise_timeline_weekly) AS average_sessions_per_week
FROM
  mental_health
GROUP BY
  workout_companion;


Unnamed: 0,workout_companion,average_sessions_per_week
0,Alone - focused,3.592649
1,"Alone - focused, Alone - lonely",4.263158
2,"Alone - focused, Alone - lonely, Alone - unmot...",1.0
3,"Alone - focused, Alone - lonely, Together - di...",3.333333
4,"Alone - focused, Alone - lonely, Together - fo...",4.2
5,"Alone - focused, Alone - lonely, Together - fo...",4.0
6,"Alone - focused, Alone - unmotivated",3.428571
7,"Alone - focused, Alone - unmotivated, Together...",4.5
8,"Alone - focused, Together - distracted",4.15625
9,"Alone - focused, Together - focused",4.570093



Q14: For each gender, show the average scale_feelings score and average exercise_timeline_weekly, ordered by average scale_feelings descending.


Natural language question:
 For each gender, show the average scale_feelings score and average exercise_timeline_weekly, ordered by average scale_feelings descending.

Generated SQL:
 SELECT gender, AVG(scale_feelings) AS avg_scale_feelings, AVG(exercise_timeline_weekly) AS avg_exercise_timeline_weekly
FROM mental_health
GROUP BY gender
ORDER BY avg_scale_feelings DESC;
Executing SQL:
 SELECT gender, AVG(scale_feelings) AS avg_scale_feelings, AVG(exercise_timeline_weekly) AS avg_exercise_timeline_weekly
FROM mental_health
GROUP BY gender
ORDER BY avg_scale_feelings DESC;


Unnamed: 0,gender,avg_scale_feelings,avg_exercise_timeline_weekly
0,Male,6.691604,3.778886
1,Female,6.310867,3.843191



Q15: Among respondents who use a fitness tracker, compare the average scale_feelings score for different primary_goal categories.


Natural language question:
 Among respondents who use a fitness tracker, compare the average scale_feelings score for different primary_goal categories.

Generated SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE fitness_tracker IS NOT NULL
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE fitness_tracker IS NOT NULL
GROUP BY primary_goal;


Unnamed: 0,primary_goal,average_scale_feelings
0,Mental stress relief,5.822967
1,"Mental stress relief, Muscle gain",8.018182
2,"Mental stress relief, Others",7.333333
3,"Mental stress relief, Weight loss",5.449275
4,"Mental stress relief, Weight loss, Muscle gain",6.733333
5,Muscle gain,6.482394
6,"Muscle gain, Others",6.375
7,"Muscle gain,Weight loss",6.666667
8,Others,5.5
9,Physical health,6.435644



Q16: Among respondents whose primary_goal is mental health, show the number of respondents for each current_fitness_lvl and the average scale_feelings score within each level.


Natural language question:
 Among respondents whose primary_goal is mental health, show the number of respondents for each current_fitness_lvl and the average scale_feelings score within each level.

Generated SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE primary_goal = 'Mental health'
GROUP BY current_fitness_lvl;
Executing SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE primary_goal = 'Mental health'
GROUP BY current_fitness_lvl;


Unnamed: 0,current_fitness_lvl,number_of_respondents,average_scale_feelings



Q17: Among respondents who exercise at least 4 times per week, show the number of respondents for each primary_goal and the average scale_feelings score.


Natural language question:
 Among respondents who exercise at least 4 times per week, show the number of respondents for each primary_goal and the average scale_feelings score.

Generated SQL:
 SELECT primary_goal, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE exercise_timeline_weekly >= 4
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE exercise_timeline_weekly >= 4
GROUP BY primary_goal;


Unnamed: 0,primary_goal,number_of_respondents,average_scale_feelings
0,Mental stress relief,108,6.351852
1,"Mental stress relief, Muscle gain",99,8.242424
2,"Mental stress relief, Others",3,7.333333
3,"Mental stress relief, Weight loss",37,6.081081
4,"Mental stress relief, Weight loss, Muscle gain",7,8.142857
5,Muscle gain,192,7.28125
6,"Muscle gain, Others",7,6.857143
7,"Muscle gain,Weight loss",6,6.666667
8,Others,5,7.4
9,Physical health,82,7.414634



Q18: For respondents who report that their family_reaction is very supportive, show the number of respondents for each primary_goal and the average scale_feelings score.


Natural language question:
 For respondents who report that their family_reaction is very supportive, show the number of respondents for each primary_goal and the average scale_feelings score.

Generated SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE family_reaction ='very supportive'
GROUP BY primary_goal;
Executing SQL:
 SELECT primary_goal, COUNT(*) AS respondent_count, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE family_reaction ='very supportive'
GROUP BY primary_goal;


Unnamed: 0,primary_goal,respondent_count,average_scale_feelings



Q19: Among respondents who follow a diet routine, show the number of respondents for each favorite_workout and the average scale_feelings score.


Natural language question:
 Among respondents who follow a diet routine, show the number of respondents for each favorite_workout and the average scale_feelings score.

Generated SQL:
 SELECT favorite_workout, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE follow_dietroutine = 'Yes'
GROUP BY favorite_workout;
Executing SQL:
 SELECT favorite_workout, COUNT(*) AS number_of_respondents, AVG(scale_feelings) AS average_scale_feelings
FROM mental_health
WHERE follow_dietroutine = 'Yes'
GROUP BY favorite_workout;


Unnamed: 0,favorite_workout,number_of_respondents,average_scale_feelings



Q20: Among respondents who do not use a fitness tracker, show the number of respondents for each current_fitness_lvl and the average exercise_timeline_weekly.


Natural language question:
 Among respondents who do not use a fitness tracker, show the number of respondents for each current_fitness_lvl and the average exercise_timeline_weekly.

Generated SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
WHERE fitness_tracker IS NULL OR fitness_tracker = '';
Executing SQL:
 SELECT current_fitness_lvl, COUNT(*) AS number_of_respondents, AVG(exercise_timeline_weekly) AS average_exercise_timeline_weekly
FROM mental_health
WHERE fitness_tracker IS NULL OR fitness_tracker = '';


Unnamed: 0,current_fitness_lvl,number_of_respondents,average_exercise_timeline_weekly
0,,0,



Q21: Show 10 example rows with columns: age, primary_goal, workout_companion, favorite_workout, scale_feelings.


Natural language question:
 Show 10 example rows with columns: age, primary_goal, workout_companion, favorite_workout, scale_feelings.

Generated SQL:
 SELECT age, primary_goal, workout_companion, favorite_workout, scale_feelings
FROM mental_health
LIMIT 10;
Executing SQL:
 SELECT age, primary_goal, workout_companion, favorite_workout, scale_feelings
FROM mental_health
LIMIT 10;


Unnamed: 0,age,primary_goal,workout_companion,favorite_workout,scale_feelings
0,25,Mental stress relief,Alone - focused,Yoga,4
1,22,Weight loss,Together - focused,Strength training,7
2,32,"Mental stress relief, Weight loss",Alone - unmotivated,"Cardio, Strength training",5
3,38,"Weight loss, Muscle gain",Together - focused,"Cardio, Strength training",7
4,38,"Weight loss, Muscle gain",Together - focused,"Cardio, Strength training",8
5,23,"Physical health, Muscle gain",Alone - focused,Strength training,8
6,24,Mental stress relief,Alone - lonely,Cardio,5
7,24,Physical health,Alone - focused,"Cardio, Strength training",6
8,25,Physical health,Together - focused,Cardio,7
9,27,Mental stress relief,Alone - focused,Cardio,4



Q22: Show 10 example biggest_challenge responses along with age and primary_goal.


Natural language question:
 Show 10 example biggest_challenge responses along with age and primary_goal.

Generated SQL:
 SELECT age, primary_goal, biggest_challenge
FROM mental_health
LIMIT 10;
Executing SQL:
 SELECT age, primary_goal, biggest_challenge
FROM mental_health
LIMIT 10;


Unnamed: 0,age,primary_goal,biggest_challenge
0,25,Mental stress relief,"Lack of time, Motivation issues"
1,22,Weight loss,"Lack of time, Motivation issues, Lack of guida..."
2,32,"Mental stress relief, Weight loss","Lack of time, Motivation issues"
3,38,"Weight loss, Muscle gain",Lack of time
4,38,"Weight loss, Muscle gain",Lack of time
5,23,"Physical health, Muscle gain","Financial cost, Lack of time, Lack of guidance..."
6,24,Mental stress relief,Motivation issues
7,24,Physical health,"Lack of time, Motivation issues"
8,25,Physical health,Motivation issues
9,27,Mental stress relief,"Motivation issues, Physical limitations"



Q23: Show 10 example rows where the primary_goal is mental health, including columns: age, current_fitness_lvl, exercise_timeline_weekly, biggest_mental_shift.


Natural language question:
 Show 10 example rows where the primary_goal is mental health, including columns: age, current_fitness_lvl, exercise_timeline_weekly, biggest_mental_shift.

Generated SQL:
 SELECT age, current_fitness_lvl, exercise_timeline_weekly, biggest_mental_shift
FROM mental_health
WHERE primary_goal = 'Mental health';
Executing SQL:
 SELECT age, current_fitness_lvl, exercise_timeline_weekly, biggest_mental_shift
FROM mental_health
WHERE primary_goal = 'Mental health';


Unnamed: 0,age,current_fitness_lvl,exercise_timeline_weekly,biggest_mental_shift



Q24: Show 10 example rows where the family_reaction is negative, including columns: age, primary_goal, impact_on_daylife, biggest_challenge.


Natural language question:
 Show 10 example rows where the family_reaction is negative, including columns: age, primary_goal, impact_on_daylife, biggest_challenge.

Generated SQL:
 SELECT age, primary_goal, impact_on_daylife, biggest_challenge
FROM mental_health
WHERE family_reaction = 'negative'
LIMIT 10;
Executing SQL:
 SELECT age, primary_goal, impact_on_daylife, biggest_challenge
FROM mental_health
WHERE family_reaction = 'negative'
LIMIT 10;


Unnamed: 0,age,primary_goal,impact_on_daylife,biggest_challenge



Q25: Show 10 example rows where hydration_attention is high, including columns: age, primary_goal, favorite_workout, scale_feelings.


Natural language question:
 Show 10 example rows where hydration_attention is high, including columns: age, primary_goal, favorite_workout, scale_feelings.

Generated SQL:
 SELECT age, primary_goal, favorite_workout, scale_feelings
FROM mental_health
WHERE hydration_attention = 'high'
LIMIT 10;
Executing SQL:
 SELECT age, primary_goal, favorite_workout, scale_feelings
FROM mental_health
WHERE hydration_attention = 'high'
LIMIT 10;


Unnamed: 0,age,primary_goal,favorite_workout,scale_feelings



Q26: Which primary_goal has the highest average exercise_timeline_weekly, and what is that average along with the number of respondents?


Natural language question:
 Which primary_goal has the highest average exercise_timeline_weekly, and what is that average along with the number of respondents?

Generated SQL:
 SELECT primary_goal, AVG(exercise_timeline_weekly) AS average_exercise_timeline, COUNT(*) AS respondents_count
FROM mental_health
GROUP BY primary_goal
ORDER BY average_exercise_timeline DESC
LIMIT 1;
Executing SQL:
 SELECT primary_goal, AVG(exercise_timeline_weekly) AS average_exercise_timeline, COUNT(*) AS respondents_count
FROM mental_health
GROUP BY primary_goal
ORDER BY average_exercise_timeline DESC
LIMIT 1;


Unnamed: 0,primary_goal,average_exercise_timeline,respondents_count
0,"Muscle gain,Weight loss",5.666667,6



Q27: Which favorite_workout has the highest average scale_feelings score, and how many respondents chose it?


Natural language question:
 Which favorite_workout has the highest average scale_feelings score, and how many respondents chose it?

Generated SQL:
 SELECT favorite_workout, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY favorite_workout
ORDER BY average_scale_feelings DESC
LIMIT 1;
Executing SQL:
 SELECT favorite_workout, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY favorite_workout
ORDER BY average_scale_feelings DESC
LIMIT 1;


Unnamed: 0,favorite_workout,average_scale_feelings,respondent_count
0,"Cardio, Strength training, High-Intensity Inte...",10.0,1



Q28: Which current_fitness_lvl has the highest average scale_feelings score, and how many respondents are in that level?


Natural language question:
 Which current_fitness_lvl has the highest average scale_feelings score, and how many respondents are in that level?

Generated SQL:
 SELECT current_fitness_lvl, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY current_fitness_lvl
ORDER BY average_scale_feelings DESC
LIMIT 1;
Executing SQL:
 SELECT current_fitness_lvl, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY current_fitness_lvl
ORDER BY average_scale_feelings DESC
LIMIT 1;


Unnamed: 0,current_fitness_lvl,average_scale_feelings,respondent_count
0,Expert,8.462428,173



Q29: Which hydration_attention category has the highest average scale_feelings score, and how many respondents are in that category?


Natural language question:
 Which hydration_attention category has the highest average scale_feelings score, and how many respondents are in that category?

Generated SQL:
 SELECT hydration_attention, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY hydration_attention
ORDER BY average_scale_feelings DESC
LIMIT 1;
Executing SQL:
 SELECT hydration_attention, AVG(scale_feelings) AS average_scale_feelings, COUNT(*) AS respondent_count
FROM mental_health
GROUP BY hydration_attention
ORDER BY average_scale_feelings DESC
LIMIT 1;


Unnamed: 0,hydration_attention,average_scale_feelings,respondent_count
0,Very much,7.817694,373



Q30: Which biggest_challenge appears most frequently in the dataset, and how many respondents reported it?


Natural language question:
 Which biggest_challenge appears most frequently in the dataset, and how many respondents reported it?

Generated SQL:
 SELECT biggest_challenge, COUNT(*) AS frequency
FROM mental_health
GROUP BY biggest_challenge
ORDER BY frequency DESC
LIMIT 1;
Executing SQL:
 SELECT biggest_challenge, COUNT(*) AS frequency
FROM mental_health
GROUP BY biggest_challenge
ORDER BY frequency DESC
LIMIT 1;


Unnamed: 0,biggest_challenge,frequency
0,Lack of time,427
