Part 1: Prompt engineering

In [7]:
!pip install google-generativeai



In [8]:
#we will be exploring google's python wrapper around their ai API
import os
import google.generativeai as genai

#generate your own key on https://aistudio.google.com/apikey
gemini_api_key = 'AIzaSyDMzirF3-0z94a8On2kjVGr4SzKYlVkFo4'

genai.configure(api_key=gemini_api_key)
multimodal_model = genai.GenerativeModel("gemini-1.5-flash-002")


In [9]:
#recall that text prompting and image prompting are both supported
model_response = multimodal_model.generate_content("hi, how was your day?")
model_response.text

"As a large language model, I don't experience days in the same way humans do. I don't have feelings or personal experiences.  However, I've been busy processing information and responding to a variety of queries, which is what I'm designed for.  Thanks for asking! How was your day?\n"

In [10]:
from PIL import Image

img = Image.open("harrypotter.webp")
model_response = multimodal_model.generate_content(["how many male and female actors are in this image? return as list of two numbers: ", img])
model_response.text

'[2, 1]'

In [11]:
#using your BERT sentiment analysis code from project 3, repeat the process with the uiuc dataset
import pandas as pd
from transformers import pipeline # type: ignore

#download bert model
pipe = pipeline("text-classification", model="finiteautomata/bertweet-base-sentiment-analysis")

KeyboardInterrupt: 

In [None]:
uiuc = pd.read_csv('uiuc.csv')

def sentiment(text):
    if not text:  
        return None
    try:
        text2 = text[:128]
        return pipe(text2)[0]['label']
    except Exception as e:
        return None 

In [None]:
uiuc.loc[:, 'sentiment'] = uiuc['text'].apply(sentiment)

In [None]:
#using the Gemini API, write a prompt to generate sentiment analysis on the same dataset

#make sure to include in the prompt a limit to the type of results (positive, negative, neutral)

def gemini_sentiment(text):
    try:
        response = multimodal_model.generate_text(
            prompt=f"Classify the sentiment of this text post as positive, negative, or neutral: {text}",
            temperature=0.0,  
            max_output_tokens=5  
        )
        sentiment = response.result.strip()  
        return sentiment
    except Exception as e:
        return None

In [None]:
uiuc['gemini_sentiment'] = uiuc['text'].apply(gemini_sentiment)

In [None]:
#compare the sentiment percentages, what do you notice? Does one method overestimate or underestimate the sentiment of the dataset?
negative_percentages_bert = uiuc.groupby('label')['sentiment'].apply(lambda x: (x == 'NEGATIVE').mean() * 100)
positive_percentages_bert = uiuc.groupby('label')['sentiment'].apply(lambda x: (x == 'POSITIVE').mean() * 100)
neutral_percentages_bert = uiuc.groupby('label')['sentiment'].apply(lambda x: (x == 'NEUTRAL').mean() * 100)
negative_percentages_gem = uiuc.groupby('label')['gemini_sentiment'].apply(lambda x: (x == 'NEGATIVE').mean() * 100)
positive_percentages_gem = uiuc.groupby('label')['gemini_sentiment'].apply(lambda x: (x == 'POSITIVE').mean() * 100)
neutral_percentages_gem = uiuc.groupby('label')['gemini_sentiment'].apply(lambda x: (x == 'NEUTRAL').mean() * 100)

print(negative_percentages_bert, positive_percentages_bert, neutral_percentages_bert, negative_percentages_gem, positive_percentages_gem, neutral_percentages_gem)


In [None]:
#find a few cases where their judgement differs, what do you think is the reason for the discrepancy? And which answer do you find more convincing?

Part 2: images

In [None]:
#download 10 images from the internet with a feature you're interested in studying. e.g. gender, race, age, action, etc.
images = ["image1.jpg", "image2.jpg", "image3.jpg", "image4.jpg", "image5.jpg", "image6.jpg", "image7.jpg", "image8.jpg", "image9.jpg", "image10.jpg"]
#ask the model to annotate the images with the features you're interested in studying
#choose 2 objective (clear right or wrong answer) questions and ask the model to answer them, like how many people are in the image, or what is the color of the object in the image
#choose 2 subjective (open to interpretation) questions and ask the model to answer them, like what is the mood of the person in the image or what race/gender is the person
for img in images:
    image = Image.open(images)
    annotation = multimodal_model.generate_content(["how many people are in this image? what is the color of the peoples hair in the image? what race are the ppeople in this image? what gender are the people in this image? ", img])
    print(f"{img}: {annotation.text}")

In [None]:
#look through the responses. Is there anything you disagree with? What do you think is the reason for the discrepancy? Would you trust large scale results generated for this annotation? 

Part 3: Network Demo

In [None]:
!pip install networkx

In [2]:
import networkx as nx

In [5]:
#new graph
G = nx.Graph()

In [None]:
G.add_node(1)
G.add_nodes_from([2, 3])
#can add additional attributes to the nodes
G.add_nodes_from([(4, {"color": "red"}), (5, {"color": "green"})])

In [None]:
G.nodes[4]

In [None]:
list(G.nodes)

In [11]:
# can manually add edges too
G.add_edge(1, 2)


In [None]:
G.number_of_edges() 

In [21]:
#load edges from csv
import pandas as pd

edges = pd.read_csv("got-edges.csv")

G = nx.from_pandas_edgelist(edges, 'Source', 'Target')

In [None]:
G.number_of_edges()

In [None]:
#visualize the graph

import matplotlib.pyplot as plt

nx.draw(G, with_labels=True)

plt.show()




In [None]:
#calculate the density of the graph

nx.density(G)


In [None]:
#return highest degree nodes

sorted(G.degree, key=lambda x: x[1], reverse=True)

In [28]:

#make dataframes with nodes and a column for each centrality measure
df=pd.DataFrame(list(nx.degree_centrality(G).items()), columns=['node', 'degree'])
#add column for betweeness centrality
df['betweenness'] = list(nx.betweenness_centrality(G).values())
#add column for closeness centrality
df['closeness'] = list(nx.closeness_centrality(G).values())
#add column for eigenvector centrality
df['eigenvector'] = list(nx.eigenvector_centrality(G).values())





3a. explore this dataframe, are there huge differences between these types of centrality? What might cause this?

In [None]:
df

In [None]:
#calculate community structure
import networkx.algorithms.community as nxcom
communities = sorted(nxcom.greedy_modularity_communities(G), key=len, reverse=True)

#add community to node features

for i, community in enumerate(communities):
    for node in community:
        df.loc[df.node == node, "community"] = i

#color nodes by community
colors = df.community / df.community.max()

nx.draw(G, with_labels=True, node_color=colors, cmap=plt.cm.tab20)

plt.show()


Part 4: make your own social network. Take either a short excerpt of a novel, tv show, movie, or real life social network you are familiar with. Make a csv modelled off of the got-edges.csv with a Source, Target, and weight column. You need to decide what constitutes an edge and node, but easiest is characters or people connected by their number of interactions. You should manually type this into the csv. Include at least 25 edges

What kind of potential issues did you run into while converting it into a graph? Any ambiguities that made it difficult to decide? 

use either Gephi or NetworkX to calculate node centrality and community features and add a visualization of the graph here. Does it align with your understanding of the media? 

In [None]:
instagram = pd.read_csv('instagram_mutuals.csv')

In [None]:
G = nx.from_pandas_edgelist(instagram, 'Source', 'Target', ['Weight'])


degree_centrality = nx.degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)
eigenvector_centrality = nx.eigenvector_centrality(G, weight='Weight')

plt.figure(figsize=(12, 10))
pos = nx.spring_layout(G, seed=42)  # Layout of the graph (randomized for better spacing)
nx.draw_networkx_nodes(G, pos, node_size=700, node_color='skyblue')
nx.draw_networkx_edges(G, pos, width=2, alpha=0.7, edge_color='gray')
nx.draw_networkx_labels(G, pos, font_size=10, font_color='black')

plt.title('My Social Network on Instagram')
plt.show()

In [None]:
print("Degree Centrality:")
for node, centrality in degree_centrality.items():
    print(f"{node}: {centrality:.3f}")

print("\nBetweenness Centrality:")
for node, centrality in betweenness_centrality.items():
    print(f"{node}: {centrality:.3f}")

print("\nEigenvector Centrality:")
for node, centrality in eigenvector_centrality.items():
    print(f"{node}: {centrality:.3f}") 