<a href="https://colab.research.google.com/github/frychicken/Groupme_stat/blob/main/GraphicalData.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import csv
import time
import sys
from datetime import datetime
import pandas as pd
import requests
import json
from ast import literal_eval
import matplotlib.pyplot as plt


In [None]:
access_token = ''
group_id = ''
base_url = 'https://api.groupme.com/v3'

In [None]:
def fetch_messages(group_id, access_token):
    messages = []
    url = f"{base_url}/groups/{group_id}/messages"
    params = {'token': access_token, 'limit': 100}
    last_id = None
    total_fetched = 0
    while True:
        if last_id:
            params['before_id'] = last_id

        try:
            response = requests.get(url, params=params)
            response.raise_for_status()  # Check for HTTP errors
            batch = response.json()['response']['messages']
        except requests.exceptions.HTTPError as http_err:
            # Handle common rate limiting error
            if response.status_code == 429:
                print("Rate limit exceeded. Waiting 60 seconds before retrying...")
                time.sleep(60)
                continue
            else:
                print(f"HTTP error occurred: {http_err}")
                break
        except requests.exceptions.RequestException as req_err:
            print(f"Request error occurred: {req_err}")
            break
        except Exception as e:
            print(f"An error occurred: {e}")
            break

        if not batch:
            break

        messages.extend(batch)
        last_id = batch[-1]['id']
        total_fetched += len(batch)
        print(f"Total messages fetched: {total_fetched}")
    return messages

def save_messages_to_csv(messages, filename):
    try:
        with open(filename, mode='w', newline='', encoding='utf-8') as file:
            writer = csv.writer(file)
            writer.writerow(['Name', 'Date', 'Message', 'Like Count', 'User ID', 'Liked By'])

            for msg in messages:
                name = msg['name']
                # Convert timestamp to mm/dd/yyyy format
                created_at = datetime.fromtimestamp(msg['created_at']).strftime('%m/%d/%Y')
                text = msg['text']
                like_count = len(msg['favorited_by'])
                user_id = msg['user_id']
                liked_by = ','.join(msg['favorited_by'])
                writer.writerow([name, created_at, text, like_count, user_id, liked_by])
    except Exception as e:
        print(f"Failed to save messages: {e}")

try:
    messages = fetch_messages(group_id, access_token)
    save_messages_to_csv(messages, 'groupme_chat_history.csv')
    print("Messages have been successfully saved to 'groupme_chat_history.csv'.")
except Exception as e:
    print(f"An error occurred while fetching or saving messages: {e}")
    sys.exit(1)



In [None]:
def get_nickname(user_id):
    url = f"https://api.groupme.com/v3/groups/{group_id}?token={access_token}"
    response = requests.get(url)
    if response.status_code == 200:
        members = response.json().get('response', {}).get('members', [])
        for member in members:
            if str(member.get('user_id')) == str(user_id):
                return member.get('nickname')
    return 'Unknown'


In [None]:
df = pd.read_csv('groupme_chat_history.csv')

In [None]:
# Function to add value labels on top of each bar
def add_value_labels(ax, spacing=5):
    for rect in ax.patches:
        y_value = rect.get_height()
        x_value = rect.get_x() + rect.get_width() / 2
        label = "{:}".format(y_value)
        ax.annotate(label, (x_value, y_value), xytext=(0, spacing), textcoords="offset points", ha='center', va='bottom')


In [None]:
df['Like Count'] = df['Like Count'].astype(int)
likes_per_user = df.groupby('User ID')['Like Count'].sum()
sorted_likes = likes_per_user.sort_values(ascending=False)
nicknames = []
like_counts = []
for user_id in sorted_likes.head(30).index:
    nickname = get_nickname(user_id)
    like_count = sorted_likes[user_id]
    nicknames.append(nickname)
    like_counts.append(like_count)



plt.figure(figsize=(15, 7))
plt.bar(nicknames, like_counts, color='skyblue')
plt.title('Top 30 Users by Like Count Including "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Like Count')
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

# Filtering out "Unknown" for the second graph
filtered_nicknames = [nickname for nickname in nicknames if nickname != "Unknown"]
filtered_like_counts = [like_counts[i] for i in range(len(nicknames)) if nicknames[i] != "Unknown"]

# Plotting the graph excluding "Unknown"
plt.figure(figsize=(15, 7))
plt.bar(filtered_nicknames, filtered_like_counts, color='tomato')
plt.title('Top 30 Users by Like Count Excluding "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Like Count')
add_value_labels(plt.gca())  # Add value labels to each bar
plt.tight_layout()
plt.show()

In [None]:
message_counts = df.groupby('User ID')['User ID'].count()

# Sort users by their total messages sent, descending
sorted_message_counts = message_counts.sort_values(ascending=False)

# Prepare data for plotting
nicknames = []
message_counts = []
for user_id in sorted_message_counts.head(30).index:
    nickname = get_nickname(user_id)
    count = sorted_message_counts[user_id]
    nicknames.append(nickname)
    message_counts.append(count)

# First Graph: Including "Unknown"
plt.figure(figsize=(15, 7))
bars = plt.bar(nicknames, message_counts, color='skyblue')
plt.title('Top 30 Users by Message Count Including "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Messages Sent')
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

# Second Graph: Excluding "Unknown"
filtered_nicknames = [n for n in nicknames if n != "Unknown"]
filtered_counts = [message_counts[i] for i, n in enumerate(nicknames) if n != "Unknown"]

plt.figure(figsize=(15, 7))
bars = plt.bar(filtered_nicknames, filtered_counts, color='tomato')
plt.title('Top 30 Users by Message Count Excluding "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Messages Sent')
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

In [None]:
def safe_parse(x):
    try:
        result = literal_eval(x)
        if isinstance(result, int):
            return [result]
        return result
    except:
        return []
df['Liked By'] = df['Liked By'].apply(lambda x: safe_parse(x) if pd.notnull(x) and x.strip() != '' else [])

likes_given = {}

for liked_by in df['Liked By']:
    for user_id in liked_by:
        if user_id in likes_given:
            likes_given[user_id] += 1
        else:
            likes_given[user_id] = 1

likes_given_df = pd.DataFrame(likes_given.items(), columns=['User ID', 'Likes Given'])

sorted_likes_given = likes_given_df.sort_values(by='Likes Given', ascending=False).reset_index(drop=True)

# Prepare data for plotting
nicknames = [get_nickname(user_id) for user_id in sorted_likes_given.head(30)['User ID']]
likes_given = list(sorted_likes_given.head(30)['Likes Given'])


In [None]:
plt.figure(figsize=(15, 7))
bars = plt.bar(nicknames, likes_given, color='skyblue')
plt.title('Top 30 Users by Likes Given Including "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Likes Given')
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

# Second Graph: Excluding "Unknown"
filtered_nicknames = [n for n in nicknames if n != "Unknown"]
filtered_likes_given = [likes_given[i] for i, n in enumerate(nicknames) if n != "Unknown"]

plt.figure(figsize=(15, 7))
bars = plt.bar(filtered_nicknames, filtered_likes_given, color='tomato')
plt.title('Top 30 Users by Likes Given Excluding "Unknown"')
plt.xticks(rotation=45, ha="right")
plt.xlabel('User Nickname')
plt.ylabel('Likes Given')
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

In [None]:
df['Like Count'] = df['Like Count'].astype(int)

# Aggregate and calculate as per the provided script
agg_df = df.groupby('User ID').agg({'Like Count': ['sum', 'count']})
agg_df.columns = ['Total Likes Received', 'Total Messages Sent']
agg_df['Average Likes per Message'] = agg_df['Total Likes Received'] / agg_df['Total Messages Sent']
sorted_agg_df = agg_df.sort_values(by='Average Likes per Message', ascending=False).reset_index()
top_10_users = sorted_agg_df.head(10)

# Fetching nicknames and filtering out "Unknown"
top_10_users['Nickname'] = top_10_users['User ID'].apply(get_nickname)
filtered_top_10_users = top_10_users[top_10_users['Nickname'] != 'Unknown']

# Prepare data for plotting
nicknames = filtered_top_10_users['Nickname']
average_likes = filtered_top_10_users['Average Likes per Message']

# Plotting
plt.figure(figsize=(10, 6))
bars = plt.bar(nicknames, average_likes, color='skyblue')
plt.title('Top Users by Average Likes per Message (Excluding "Unknown")')
plt.xlabel('User Nickname')
plt.ylabel('Average Likes per Message')
plt.xticks(rotation=45, ha="right")

add_value_labels(plt.gca())
plt.tight_layout()
plt.show()


In [None]:
def convert_to_list(liked_by_str):
    if pd.isnull(liked_by_str) or not liked_by_str.strip():
        return []
    return [user_id.strip() for user_id in liked_by_str.split(',')]

df['Liked By'] = df['Liked By'].apply(convert_to_list)

self_likes = {}
for _, row in df.iterrows():
    user_id = row['User ID']
    liked_by = row['Liked By']
    if str(user_id) in liked_by:
        self_likes[user_id] = self_likes.get(user_id, 0) + 1

self_likes_df = pd.DataFrame(list(self_likes.items()), columns=['User ID', 'Self Likes'])
sorted_self_likes = self_likes_df.sort_values(by='Self Likes', ascending=False).reset_index(drop=True)

# Fetching nicknames
sorted_self_likes['Nickname'] = sorted_self_likes['User ID'].apply(get_nickname)

# Prepare data for plotting including "Unknown"
nicknames_incl_unknown = sorted_self_likes.head(30)['Nickname']
self_likes_incl_unknown = sorted_self_likes.head(30)['Self Likes']

# Prepare data for plotting excluding "Unknown"
filtered_data = sorted_self_likes[sorted_self_likes['Nickname'] != 'Unknown'].head(30)
nicknames_excl_unknown = filtered_data['Nickname']
self_likes_excl_unknown = filtered_data['Self Likes']

# Plotting including "Unknown"
plt.figure(figsize=(10, 6))
bars_incl_unknown = plt.bar(nicknames_incl_unknown, self_likes_incl_unknown, color='skyblue')
plt.title('Top Users by Self Likes Including "Unknown"')
plt.xlabel('User Nickname')
plt.ylabel('Self Likes')
plt.xticks(rotation=45, ha="right")
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()

# Plotting excluding "Unknown"
plt.figure(figsize=(10, 6))
bars_excl_unknown = plt.bar(nicknames_excl_unknown, self_likes_excl_unknown, color='tomato')
plt.title('Top Users by Self Likes Excluding "Unknown"')
plt.xlabel('User Nickname')
plt.ylabel('Self Likes')
plt.xticks(rotation=45, ha="right")
add_value_labels(plt.gca())
plt.tight_layout()
plt.show()


In [None]:
df['Message Length'] = df['Message'].apply(lambda x: len(str(x)))

agg_df = df.groupby('User ID')['Message Length'].mean().reset_index()

sorted_agg_df = agg_df.sort_values(by='Message Length', ascending=False)

sorted_agg_df['Nickname'] = sorted_agg_df['User ID'].apply(get_nickname)
top_30_users = sorted_agg_df.head(30)

include_unknown = top_30_users
exclude_unknown = top_30_users[top_30_users['Nickname'] != 'Unknown']

def plot_avg_message_length(data, title, color):
    plt.figure(figsize=(12, 8))
    bars = plt.bar(data['Nickname'], data['Message Length'], color=color)
    plt.title(title)
    plt.xlabel('User Nickname')
    plt.ylabel('Average Message Length')
    plt.xticks(rotation=45, ha="right")

    for bar in bars:
        yval = bar.get_height()
        plt.text(bar.get_x() + bar.get_width()/2, yval, round(yval, 2), ha='center', va='bottom')

    plt.tight_layout()
    plt.show()

plot_avg_message_length(include_unknown, 'Top 30 Users by Average Characters per Message Including "Unknown"', 'skyblue')

plot_avg_message_length(exclude_unknown, 'Top 30 Users by Average Characters per Message Excluding "Unknown"', 'lightgreen')
