In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


# Fair Use Calc Training and Testing

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_absolute_error
from sklearn.ensemble import GradientBoostingRegressor

# Load the training and testing data
train_df = pd.read_csv('/content/drive/MyDrive/FairUseCalculatorData/Training Dataset.csv')
test_df = pd.read_csv('/content/drive/MyDrive/FairUseCalculatorData/Testing Dataset.csv')


text_cols_train = train_df.select_dtypes(include=["object"]).columns
numeric_cols_train = train_df.select_dtypes(include=["int64","float64"]).columns
numeric_cols_train = numeric_cols_train.drop('Fair Use %')
text_cols_test = test_df.select_dtypes(include=["object"]).columns
numeric_cols_test = test_df.select_dtypes(include=["int64","float64"]).columns
numeric_cols_test = numeric_cols_test.drop('Fair Use %')
train_df['combined_text'] = train_df[text_cols_train].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
test_df['combined_text'] = test_df[text_cols_test].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)
all_text = pd.concat([train_df['combined_text'], test_df['combined_text']], axis=0)
X_train = train_df['combined_text'].to_numpy().reshape(-1, 1)
X_train = np.hstack((X_train, train_df[numeric_cols_train]))
X_test = test_df['combined_text'].to_numpy().reshape(-1, 1)
X_test = np.hstack((X_test, test_df[numeric_cols_test]))
y_train = train_df['Fair Use %']
y_test = test_df['Fair Use %']
vectorizer = TfidfVectorizer(stop_words='english')
vectorizer.fit(all_text)
X_train_texts = vectorizer.transform(train_df['combined_text']).toarray()
X_test_texts = vectorizer.transform(test_df['combined_text']).toarray()
scaler = StandardScaler()
scaler.fit(X_train[:, 1:])
X_train_nums_scaled = scaler.transform(X_train[:, 1:])
X_test_nums_scaled = scaler.transform(X_test[:, 1:])
X_train_preprocessed = np.hstack((X_train_texts, X_train_nums_scaled))
X_test_preprocessed = np.hstack((X_test_texts, X_test_nums_scaled))

# Train the model
model = GradientBoostingRegressor(random_state=0)
model.fit(X_train_preprocessed, y_train)
predictions = model.predict(X_test_preprocessed)
mae = mean_absolute_error(y_test, predictions)

print(mae)

33.35953380470592


# Image Match %

In [None]:
import cv2
from skimage.metrics import structural_similarity as ssim

def resize_image(image, target_size=(300, 300)):
    return cv2.resize(image, target_size)

def image_similarity(image1_path, image2_path, target_size=(300, 300)):
    image1 = cv2.imread(image1_path)
    image2 = cv2.imread(image2_path)
    resized_image1 = resize_image(image1, target_size)
    resized_image2 = resize_image(image2, target_size)
    gray_image1 = cv2.cvtColor(resized_image1, cv2.COLOR_BGR2GRAY)
    gray_image2 = cv2.cvtColor(resized_image2, cv2.COLOR_BGR2GRAY)
    ssi_index, _ = ssim(gray_image1, gray_image2, full=True)
    similarity_percentage = ssi_index * 100

    return similarity_percentage

image1_path = '/content/drive/MyDrive/P Final Help and Doc Pics/Sphere Select Change Size Of Sphere Pic.jpg'
image2_path = '/content/drive/MyDrive/P Final Help and Doc Pics/Sphere Select Pic.jpg'

match_p = image_similarity(image1_path, image2_path)
print(f"Similarity Percentage: {match_p:.2f}%")

Similarity Percentage: 52.75%


# User Input

In [None]:
import pandas as pd

prompts = [
    "Please describe your work",
    "Additional Details of Original Work",
    "How did you notice something was wrong?",
    "Use Environment",
    "Publication Status",
    "Impact on Market Value",
    "Market Competition",
    "Other relevant information",
    "Use of Key Portions",
    "Extent of Use",
    "Image 1 Link",
    "Image 2 Link"
]

responses = {}
links = []

for prompt in prompts:
    response_str = input(f"{prompt}: ")
    if prompt in ["Use of Key Portions", "Extent of Use", "Match %"]:
        response_num = float(response_str)
        responses[prompt] = response_num
    elif prompt in ["Image 1 Link", "Image 2 Link"]:
        links.append(response_str)
    else:
        responses[prompt] = response_str

responses["Match %"] = image_similarity(str(links[0]), str(links[1]))

df = pd.DataFrame([responses])

Please describe your work: Drawing of a knight wielding a star shaped mace. This is a fictional charcter named Star Knight that I created. I posted this on my blog, reddit, and Instagram.
Additional Details of Original Work: 
How did you notice something was wrong?: I saw a version of my work posted on reddit. It was edited to match the artistic style of "Starry Night". The background was also changed to match Starry Night
Use Environment: Reddit
Publication Status: Published
Impact on Market Value: Makes a version of my character art available on one of the same websites that I originally posted it to.
Market Competition: Makes a version of my character art available on one of the same websites that I originally posted it to.
Other relevant information: They are using a highly edited version of my original work. The central figure is the same, but it has been re-colored, re-stylized, and put on top of a new background. The new style is a play on my character's name. Its comibination w

In [None]:
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.preprocessing import StandardScaler

text_columns = ["Please describe your work", "Additional Details of Original Work", "How did you notice something was wrong?", "Use Environment", "Publication Status", "Impact on Market Value", "Market Competition", "Other relevant information"]
num_columns = ["Use of Key Portions", "Extent of Use", "Match %"]

all_text = df[text_columns].apply(lambda row: ' '.join(row.values.astype(str)), axis=1)

vectorizer = TfidfVectorizer(stop_words='english')
X_text = vectorizer.fit_transform(all_text).toarray()

scaler = StandardScaler()
X_nums_scaled = scaler.fit_transform(df[num_columns])

X_preprocessed = np.hstack((X_text, X_nums_scaled))

df_preprocessed = pd.DataFrame(X_preprocessed)


In [None]:
num_empty_features = 1445 - df_preprocessed.shape[1]
empty_data = np.full((1, num_empty_features), np.nan)
empty_df = pd.DataFrame(empty_data, columns=[i + 12 for i in range(1, num_empty_features + 1)])
placeholder_value = np.finfo(np.float32).min

empty_dfilled = empty_df.fillna(placeholder_value)
combined_df = pd.concat([df_preprocessed, empty_dfilled], axis=1)

In [None]:
pred = model.predict(combined_df)

print(pred)

[86.36929576]
