# ENIAC ANALYSIS | GOOGLE GEMINI AI

## Import Libraries

In [8]:
import numpy as np
import pandas as pd
import time
import re
import vertexai
from pathlib import Path
from vertexai.generative_models import GenerativeModel, ChatSession

pd.options.display.max_columns = None # Print all columns to console

## Import Data

In [9]:
folder = 'C:/Users/Patrick/OneDrive/Data Science/WBS Coding School/Bootcamp/02_Bootcamp/03_Week-03_Data-Cleaning&Storytelling/Python/cleaned_data/'
"""
-----List of CSVs:-----
brands.csv
orderlines.csv
orders.csv
products.csv
"""

brands = pd.read_csv(folder + 'brands.csv')
orderlines = pd.read_csv(folder + 'orderlines_cleaned_Completed.csv')
orders = pd.read_csv(folder + 'orders_cleaned_Completed.csv')
products = pd.read_csv(folder + 'products_cleaned_Completed.csv')

## Initialize additional description column

In [10]:
products['desc2'] = np.NaN

products.sample(5)

Unnamed: 0,sku,name,desc,unit_price,in_stock,type,type_var,desc2
4763,APP1652-A,Open - Apple iPhone 6s Plus 32GB Space Gray,New iPhone 6S Plus 32GB free with the open box,639.0,0,24811716,93,
988,BOS0028,Bose Headphones White IE SoundTrue,Headphones great restraint iPhone iPod and iPad.,129.95,0,5384,6,
1692,PAC1841,Synology DS216J | 12TB (2x6TB) Seagate IronWolf,NAS server with 12TB capacity (2x6TB) Seagate ...,735.97,0,12175397,31,
1813,BEL0196,Belkin Car Charger 1A Black,Car Charger 5W 1A USB port for universal iPhone.,9.99,1,13615399,57,
593,BEL0172,Belkin Slim-Fit Armband Plus iPhone 6 / 6S Rosa,Bracelet perfect fit for iPhone 6 / 6S.,24.99,0,5405,46,


## Connect to Gemini AI

In [258]:
project_id = "GCP_PROJECT_NAME"
location = "GCP_LOCATION"

vertexai.init(project=project_id, location=location)
model = GenerativeModel("gemini-1.0-pro")
chat = model.start_chat()

def get_chat_response(chat: ChatSession, prompt: str) -> str:
    text_response = []
    responses = chat.send_message(prompt, stream=True)
    for chunk in responses:
        text_response.append(chunk.text)
    return "".join(text_response)

# The initial string to ask gemini
# str_init = "give me 3 product-keywords that describe items in the following list (don't include numbers). just return the keywords, nothing else. "
str_init = "Tell me with 3 keywords which products the following list is about. Tell me just the product-types"

df_ref = products[products['desc2'].isna()]

for i in sorted(df_ref['type_var'].unique()):
    rowselect = products['type_var']==i
    
    df = products.loc[rowselect, 'desc'].head(3)
    
    desc = str(list(df))
    
    prompt = str_init + re.sub("[^\w,\s]", "", desc)[:1000]
    
    response = get_chat_response(chat, prompt)
    
    products.loc[rowselect,'desc2'] = response
    
    time.sleep(5.0)

## Test results

In [302]:
# TEST - 23, 24, 41 are hard drives
products[products['type_var'].isin([23,24,41])][['name','desc2','type_var']].sample(10)

Unnamed: 0,name,desc2,type_var
921,G-Technology G-Drive ev ATC Thunderbolt Hard D...,"External hard drives, Mac accessories, PC acce...",23
363,LaCie d2 Quadra 4TB External Hard Drive USB 3....,"External hard drives, Mac accessories, PC acce...",23
4941,WD Blue 250GB SATA SSD Nand 3D,"SSD upgrades, MacBook Air accessories, Compute...",41
92,SSD expansion kit OWC Aura Pro 6G 240GB MacBoo...,"SSD upgrades, MacBook Air accessories, Compute...",41
2210,BAR Samsung 128GB USB 3.0 Flash Drive Up to 13...,"External hard drives, Mac accessories, PC acce...",23
4075,OWC Mercury Electra 6G SSD 250GB,"SSD upgrades, MacBook Air accessories, Compute...",41
264,LaCie Porsche Design P9223 Slim 500GB HDD,"External hard drives, Mac accessories, PC acce...",23
1544,OWC ThunderBay IV mini Thunderbolt 2 RAID,"External hard drives, Mac accessories, PC acce...",23
73,Envoy OWC USB 3.0 Case for MacBook Air SSD 201...,"External hard drives, Mac accessories, PC acce...",23
2664,OWC SSD Disk 240GB 6G Neptune 7mm SATA 3,"SSD upgrades, MacBook Air accessories, Compute...",41


In [306]:
products['desc2'].isna().sum()

0

In [308]:
type_to_keywords = products[['type','type_var','desc2']]

type_to_keywords.to_csv(folder + 'type_to_keywords.csv',index=False)

## Export results

In [312]:
filename = 'type_to_keywords.csv'
df_gemini_desc = pd.read_csv(folder + filename).groupby('type_var').agg({'desc2':'first'})



Unnamed: 0_level_0,desc2
type_var,Unnamed: 1_level_1
0,"Laptop accessories, Keyboard accessories, Trac..."
1,"Keyboards, Keypads, Mac accessories"
2,"Mice, Mouse accessories, Computer accessories"
3,"iPhone accessories, iPod accessories, iPad acc..."
4,"Mac accessories, Memory, RAM"
...,...
118,"Smartphones, Apple products, iPhone accessories"
119,"Desktop computers, Mac computers, Refurbished ..."
120,"Tablets, Apple products, iPad accessories"
121,"Smart home devices, HomeKit accessories, Sensors"
