# Prompt Analysis
Using NLP to determine which prompts our network got wrong to better understand which prompts are best for each diffusion model.

## Import Libraries
Import all necessary libraries

In [41]:
from collections import Counter, defaultdict
import nltk
import numpy as np
import pandas as pd
import torch
import torch.nn as nn

# For using NLTK later
nltk.download('punkt')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /home/cpondoc/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /home/cpondoc/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


True

## Import Models
For this case, we only import ResNet-18.

In [2]:
from models.transferlearning import load_pretrained_model

## Import Dataset Class and Transforms
Used to help load in the images for heatmap generation

In [3]:
from utilities.dataset import ImageDataset
from utilities.transforms import data_transforms

## Function to Determine Confusion Matrix
The same framework as testing through the model, and then looking at prediction and output.

In [30]:
def generate_confusion_matrix(transform, weights_path, batch_size, network, first, second):
    # Loading in initial data
    print("\nLoading in data...")
    test_data = ImageDataset("test", transform, 0.6, first, second)
    testloader = torch.utils.data.DataLoader(test_data, batch_size=batch_size,
                                            shuffle=True)
    print("Done loading in data.")

    # Test Loading
    dataiter = iter(testloader)
    images, labels, img_names = next(dataiter)

    # Loading in a new example of the neural net, and loading in the weights
    net = network
    if (torch.cuda.is_available()):
        net.to('cuda')
        net.load_state_dict(torch.load(weights_path))
    else:
        net.load_state_dict(torch.load(weights_path, map_location=torch.device('cpu')))
    
    # Generate confusion matrix -- also count all real but fake images, and all images
    print("Generate Confusion Matrix")
    matrix = [[0, 0], [0, 0]]
    real_but_fake = []
    fake_and_fake = []
    all_images = []
    
    # Perform same as testing/inferencing, but logging data.
    with torch.no_grad():
        for data in testloader:
            images, labels, paths = data
            for i in range(len(paths)):
                all_images.append(paths[i])
                
            # Case for having a GPU available
            images_cuda, labels_cuda = images, labels
            if (torch.cuda.is_available()):
                images_cuda, labels_cuda = images.cuda(), labels.cuda()
            
            # Feed the images through our network and evaluate them
            outputs = net(images_cuda)
            _, predicted = torch.max(outputs.cpu().data, 1)
            
            # Calculate the right part of the matrix of prediction and actual
            for i in range(len(predicted)):
                prediction = int(predicted[i].item())
                actual = int(labels[i].item())
                matrix[prediction][actual] += 1
                
                # Adding to set of images to further analyze if real, but fake
                if (prediction is 1 and actual is 0):
                    real_but_fake.append(paths[i])
                elif (prediction is 0 and actual is 0):
                    fake_and_fake.append(paths[i])
    
    return matrix, real_but_fake, fake_and_fake, all_images

## Main Function
Runs all of the necessary functions!

In [31]:
def main(model_type, weights, first, second):
    # Generate the necessary heatmaps
    model = load_pretrained_model()
    matrix, real_but_fake, fake_and_fake, all_images = generate_confusion_matrix(data_transforms[model_type], weights, 200, model, first, second)
    
    return matrix, real_but_fake, fake_and_fake, all_images

## Function to Calculate Precision and Recall
In addition to accuracy, we calculate precision = $\frac{\text{true positives}}{\text{false positives + true positives}}$, as well as recall = $\frac{\text{true positives}}{\text{false negatives + true positives}}$.

In [32]:
def precision_and_recall(matrix):
    precision = float(matrix[1][1] / (matrix[1][1] + matrix[1][0]))
    recall = float(matrix[1][1] / (matrix[1][1] + matrix[0][1]))
    return precision, recall

## Run all code!
Runs all of the code for Transfer Learning.

In [33]:
# Get and print confusion matrix
matrix, real_but_fake, fake_and_fake, all_images = main(model_type = "TransferLearning", weights = 'weights/TransferLearning/dalle/TransferLearning-0.6.pth', first='dalle', second='real')
print(matrix)

# Get and print precision and recall
precision, recall = precision_and_recall(matrix)
print("Precision: " + str(precision))
print("Recall: " + str(recall))


Loading in data...
Done loading in data.
Generate Confusion Matrix
[[280, 11], [8, 277]]
Precision: 0.9719298245614035
Recall: 0.9618055555555556


## Function for Total Number of Nouns + Length
Using NLTK to calculate total number of nouns and length (number of words).

In [34]:
def nouns_and_tokens(description):
    tokens = nltk.word_tokenize(description)
    tagged = nltk.pos_tag(tokens)
    counts = Counter(tag for word,tag in tagged)
    num_nouns = counts['NN'] + counts['NNS'] + counts['NNP'] + counts['NNPS']
    return num_nouns, tokens

## Function for Linguistic Analysis
Using the above function by looking at a specific dataset.

In [35]:
def linguistic_analysis(all_images):
    # Used to refer to mappings
    df = pd.read_csv('dataset/reference.csv')
    
    # Keeping track of total nouns and lengths
    total = 0
    all_nouns = []
    all_lengths = []
    
    # Iterate through each imags and calculate
    for img in all_images:
        # Grab the description
        index = int(img[-9:-4])
        description = df.iloc[index]['description']
        
        # Update nouns and tokens variables
        num_nouns, tokens = nouns_and_tokens(description)
        total += num_nouns
        all_nouns.append(num_nouns)
        all_lengths.append(len(tokens))
    
    # Print out statistics on number of nouns
    print("Number of Nouns:")
    print("Mean: " + str(np.mean(all_nouns)))
    print("Variance: " + str(np.var(all_nouns)))
    print("")
    
    # Print out statistics on lengths
    print("Length of Message:")
    print("Mean: " + str(np.mean(all_lengths)))
    print("Variance: " + str(np.var(all_lengths)))
    print("")
    
    return all_lengths, all_nouns

## Function to Print Specific Nouns
We can use this information to suggest an adversarial dataset.

In [44]:
def specific_nouns(all_images):
    # Used to refer to mappings
    df = pd.read_csv('dataset/reference.csv')
    
    # Keeping track of all nouns and the tags for nouns
    all_nouns, nouns_map = [], defaultdict(int)
    noun_tags = ['NN', 'NNS', 'NNP', 'NNPS']
    
    # Iterate through each image
    for img in all_images:
        # Get the description
        index = int(img[-9:-4])
        description = df.iloc[index]['description']
        #print(description)
        
        # Get the tokens and their tags
        tokens = nltk.word_tokenize(description)
        tagged = nltk.pos_tag(tokens)
        
        # Add to set if a noun
        for word, tag in tagged:
            if (tag in noun_tags):
                all_nouns.append(word)
                nouns_map[word] += 1
                
    return all_nouns, dict(sorted(nouns_map.items(), key=lambda item: item[1]))

## Applying Linguistic Analysis Functions
Calling above functions to print out data!

In [37]:
print("Analyzing All Images:")
all_lengths, all_nouns = linguistic_analysis(all_images)

print("\nAnalyzing Real, but Fake Images:")
rbf_lengths, rbf_nouns = linguistic_analysis(real_but_fake)

print("\nAnalyzing Fake Images Classified Correctly:")
faf_lengths, faf_nouns = linguistic_analysis(fake_and_fake)

Analyzing All Images:
Number of Nouns:
Mean: 5.826388888888889
Variance: 4.553192515432098

Length of Message:
Mean: 19.416666666666668
Variance: 39.31944444444444


Analyzing Real, but Fake Images:
Number of Nouns:
Mean: 5.0
Variance: 3.25

Length of Message:
Mean: 17.625
Variance: 11.234375


Analyzing Fake Images Classified Correctly:
Number of Nouns:
Mean: 5.85
Variance: 4.5703571428571435

Length of Message:
Mean: 19.46785714285714
Variance: 40.02753826530613



## Super Awesome Bootstrapping Techniques!
Ensuring statistical significance to make claims.

In [38]:
def bootstrapping(total_observations, subsection_of_interest):
    sample_mean = np.mean(total_observations)
    mean_difference = abs(sample_mean - np.mean(subsection_of_interest))
    subsection_length = len(subsection_of_interest)
    count = 0.0
    iteration_count = 10000
    for _ in range(iteration_count):
        sampled_lengths = np.random.choice(total_observations, subsection_length, replace=True)
        if abs(np.mean(sampled_lengths) - sample_mean) >= mean_difference:
            count += 1
    print(count / iteration_count)

## Calculating Statistical Significance
Calling above function!

In [39]:
print("Running simple bootstrapping to test against null hypothesis")
print("Statistical significance of prompt lengths")
bootstrapping(all_lengths, rbf_lengths)
print("Statistical significance of noun counts")
bootstrapping(all_nouns, rbf_nouns)

Running simple bootstrapping to test against null hypothesis
Statistical significance of prompt lengths
0.4298
Statistical significance of noun counts
0.2728


## Looking at Nouns in Real, but Fake
See motivation above!

In [45]:
print("Looking at nouns of real, but fake images set:")
all_nouns, nouns_map = specific_nouns(real_but_fake)
print(nouns_map)

print("Looking at nouns of correctly classified fake images:")
all_nouns, nouns_map = specific_nouns(fake_and_fake)
print(nouns_map)

Looking at nouns of real, but fake images set:
{'ocean': 1, 'Groom': 1, 'tux': 1, 'bride': 1, 'dress': 1, 'flowers': 1, 'grass': 1, 'stick': 1, 'mouth': 1, 'woman': 1, 'rope': 1, 'bridge': 1, 'trees': 1, 'people': 1, 'end': 1, 'adults': 1, 'sit': 1, 'homey': 1, 'porch': 1, 'front': 1, 'flora': 1, 'nature': 1, 'scene': 1, 'toddler': 1, 'grin': 1, 'female': 1, 'boy': 1, 't-ball': 1, 'day': 1, 'dad': 1, 'watching': 1, 'fence': 1, 'men': 1, 'apartment': 1, 'board': 1, 'game': 1, 'man': 1, 'watches': 1, 'dog': 2}
Looking at nouns of correctly classified fake images:
{'figures': 1, 'ages': 1, 'wooden': 1, 'cross': 1, 'presses': 1, 'desserts': 1, 'tomatoes': 1, 'market': 1, 'buildings': 1, 'shops': 1, 'washers': 1, 'ladders': 1, 'rings': 1, 'spectators': 1, 'toys': 1, 'Woman': 1, 'pad': 1, 'dirty': 1, 'works': 1, 'remodeling': 1, 'project': 1, 'spotlight': 1, 'semi-circle': 1, 'conductor': 1, 'shadow': 1, 'drum': 1, 'overalls': 1, 'stuffed': 1, 'animal': 1, 'drinks': 1, 'horses': 1, 'sunset':