In [9]:
import pandas as pd
import numpy  as np
import requests
import json
import time
from dotenv import load_dotenv
from tqdm import tqdm

# Load environment variables from .env file
load_dotenv()

False

In [10]:
'''
    converting the mobile rec csv file to parquet
'''

# the following code chunks the csv and then appends the chunks to convert them into parquet
# it reduces the size from 4.4 GB to 1.88 GB
'''
import pandas as pd
import pyarrow as pa
import pyarrow.parquet as pq

# Specify the path to your large CSV file
csv_file     = '../data/review/mobilerec_final.csv'

# Specify the path to the output Parquet file
parquet_file = '../data/review/mobilerec_final.parquet'

# Define the chunk size
chunk_size = 100000  # Adjust based on your memory capacity

# Initialize the Parquet writer
csv_iterator = pd.read_csv(csv_file, chunksize=chunk_size)

# Iterate over chunks and write to Parquet
for i, chunk in enumerate(csv_iterator):
    # Convert the DataFrame to an Arrow Table
    table = pa.Table.from_pandas(chunk)

    # For the first chunk, create a new Parquet file
    if i == 0:
        pqwriter = pq.ParquetWriter(parquet_file, table.schema)
    
    # Append subsequent chunks to the Parquet file
    pqwriter.write_table(table)

# Close the Parquet writer
pqwriter.close()
''';

<h1>LLM set-up</h1>

In [11]:
import google.generativeai as genai
import os

genai.configure(api_key=os.getenv("GEMINI_API_KEY"))
geminiModel = genai.GenerativeModel('gemini-1.5-pro')

from openai import OpenAI

client = OpenAI(
  api_key=os.environ['OPENAI_API_KEY'],  # this is also the default, it can be omitted
)

In [12]:
# setting gemini for extracting information from the games
# for conversation, we have some of the information such as ads, security, permissions

class LLM():
    def __init__(self, geminiModel):
        self.geminiModel = geminiModel

    def geminiResponse(self,prompt,responseType):
        response = self.geminiModel.generate_content(
                        prompt,
                        generation_config = genai.types.GenerationConfig(
                                candidate_count = 1,
                                top_p=0.5,
                                top_k=1,
                                temperature=0
                            )    
                        )
        
        return(response.text)

    def claudeResponse(self,prompt):
        api_url = "https://api.anthropic.com/v1/messages"
    
        headers = {
            "Content-Type": "application/json",
            "X-API-Key": os.getenv("CLAUDE_API_KEY"),
        }
    
        data = {
            "model": "claude-3-sonnet-20240229",
            "messages": [
                {"role": "user", "content": prompt}
            ],
            "max_tokens": 1000
        }
    
        response = requests.post(api_url, headers=headers, json=data)
        
        if response.status_code == 200:
            return response.json()['content'][0]['text']
        else:
            return f"Error: {response.status_code} - {response.text}"

    def OpenAIResponse(self,sys_prompt,user_prompt):
        response = client.chat.completions.create(
        model="gpt-3.5-turbo",
        messages=[
            {"role": "system", "content": sys_prompt},
            {"role": "user", "content": user_prompt}
            ],
            max_tokens=150,
            n=1,
            stop=None,
            temperature=0.7,
        )

        # Extract the response text
        response_text = response.choices[0].message.content
        return (response_text)
    
LLMModel = LLM(geminiModel)

<h1>Collecting information about games</h1>

In [13]:
# we collect all the information required from the description (marked by - -)
'''
(i) User Interface Design: from reviews
(ii) Navigation: from reviews
(iii) Accessibility: for whom the game is for
(iv) Customization: from review
(v) Functionality: from games - - -
(vi) Performance: from reviews (ratings from games)
(vii) Responsiveness: users
(viii) Security: from apps games data
(ix) Privacy: from apps games data 
(x) Permissions: from apps games data 
(xi) Data Collection: from apps games data 
(xii) Data Sharing: from apps games data 
(xiii) Updates; 
(xiv) Customer support;
(xv) Reviews and ratings: from apps games data 
(xvi) Developer: from apps games data 
(xvii) Price: from apps games data 
(xviii) In-app purchases: from apps games data  
(xix) Advertisement Frequency: from apps games data  or reviews
(xx) Battery Drainage: from reviews
(xxi) Summary of app:  - - -
(xxii) who would you recommend this type to:  - - -
(xxiii) adjectives for the app  - - -
''';

In [14]:
def GeminiPrompts(name,descript):
    prompt = f"""You are given an app store optimization specialist, with an expertise in optimizing app metadata, including descriptions, to improve app visibility and downloads.
    You are given the following name and the description of the app below.

    ------------------------
    Name of the app: {name}
    Description of the app: {descript}

    based on the description of the app, follow the step-by-step guide below:
    1-summarize what the mobile app is about within 250 words. sumamry should be informative and DO NOT ANY EXTERNAL INFORMATION
    2-decription may have features inforamtion. identify these main features of the app. ONLY if featuers are not givem, identify from the summary you created.
    3-find adjectives for the app which should attract users
    4-find 4-5 search keywords as who is the target audience for whom is the app designed for.

    Output in the following text format. Strictly follow the format. NO special characters.

    summary: summary of the app withinn 250 words
    features: main features of the app
    adjectives:best 4-5 adjectives for the app
    search_term:keywords of 4-5 target audience for whom is the app designed for/words that would atttract users to this app
    """
    return(prompt)

def OpenAIPrompts(name,descript):
    sys_prompt = f"""You are given an app store optimization specialist, with an expertise in optimizing app metadata, including descriptions, to improve app visibility and downloads.
    Based on the description of the app, follow the step-by-step guide below:
    1-summarize what the mobile app is about within 250 words. sumamry should be informative and DO NOT ANY EXTERNAL INFORMATION
    2-decription may have features inforamtion. identify these main features of the app. ONLY if featuers are not givem, identify from the summary you created.
    3-find adjectives for the app which should attract users
    4-find 4-5 search keywords as who is the target audience for whom is the app designed for.

    Output in the following text format. Strictly follow the format. NO special characters.

    summary: summary of the app withinn 250 words
    features: main features of the app
    adjectives:best 4-5 adjectives for the app
    search_term:keywords of 4-5 target audience for whom is the app designed for/words that would atttract users to this app

    Think carefully if you have all these fields or not, if not, make sure you dont miss any of summary, feature, adjectives or search_term
    """

    user_prompt = f"""
    You are given the following name and the description of the app below.
    ------------------------
    Name of the app: {name}
    Description of the app: {descript}
    """

    return(sys_prompt,user_prompt)

In [16]:
try:
    with open('../data/review/app_information_dict.json') as f:
        information_dict = json.load(f)

except:
    # contains the text
    information_dict = {}

    with open('../data/review/app_information_dict.json', 'w', encoding='utf-8') as f:
        json.dump(information_dict, f, indent=4)

# after reading the dict, check if it contains the app_package and store into from LLMs (in plain text now)
app_meta = pd.read_parquet("../data/review/app_meta.parquet")
count_g,count_o = 0,0

for r in tqdm(range(len(app_meta))):
    if r > 2000:
        break

    app_package = app_meta['app_package'].iloc[r]

    if app_package not in information_dict.keys():
        name     = app_meta['app_name'].iloc[r]
        descript = app_meta['description'].iloc[r]

        # first try gemini, if there is an error then go for chat gpt

        try:
            prompt = GeminiPrompts(name,descript)
            response = LLMModel.geminiResponse(prompt,None)
            information_dict[app_package] = response
            time.sleep(3)
            count_g += 1

        except:
            sys_prompt,user_prompt = OpenAIPrompts(name,descript)
            response = LLMModel.OpenAIResponse(sys_prompt,user_prompt)
            information_dict[app_package] = response
            time.sleep(2)
            count_o += 1

        with open('../data/review/app_information_dict.json', 'w', encoding='utf-8') as f:
            json.dump(information_dict, f, indent=4)

        print(count_g,count_o)
        

I0000 00:00:1723574400.936470 2435845 check_gcp_environment_no_op.cc:29] ALTS: Platforms other than Linux and Windows are not supported
 11%|█         | 1132/10173 [00:10<01:26, 104.81it/s]

0 1
1 1


 11%|█         | 1134/10173 [00:26<04:28, 33.67it/s] 

2 1


 11%|█         | 1135/10173 [00:35<06:51, 21.99it/s]

3 1


 11%|█         | 1136/10173 [00:40<08:48, 17.10it/s]

3 2


 11%|█         | 1137/10173 [00:46<11:39, 12.92it/s]

3 3


 11%|█         | 1138/10173 [00:50<15:12,  9.90it/s]

3 4


 11%|█         | 1139/10173 [00:55<20:34,  7.32it/s]

3 5


 11%|█         | 1139/10173 [00:59<07:54, 19.03it/s]


KeyboardInterrupt: 

In [17]:
name     = app_meta['app_name'].iloc[r]
descript = app_meta['description'].iloc[r]

prompt   = GeminiPrompts(name,descript)
response = LLMModel.geminiResponse(prompt,None)


'summary: Sketchfab is your portal to a vast library of 3D models, viewable in 3D, VR, and AR right on your mobile device. Explore historical wonders like Rome, examine intricate anatomical models, or stand in the presence of dinosaurs - all from the palm of your hand.  Interact with models through touch, immerse yourself in VR with a mobile headset, or use AR to bring models into your own environment.  Sketchfab allows you to connect with creators, explore curated collections, and share your favorite discoveries with friends. \n\nfeatures: \n- Explore millions of 3D models\n- View models in 3D, VR, and AR\n- Follow favorite creators\n- Explore by category or keyword search\n- Like, share, and comment on models\n- Subscribe to and create collections\n- Share your profile and discoveries with friends\n- View curated Staff Picks\n\nadjectives: Immersive, Interactive, Educational, Inspiring, Cutting-edge\n\nsearch_term: 3D models, VR, AR, 3D design, Augmented Reality \n'

In [18]:
print(response)

summary: Sketchfab is your portal to a vast library of 3D models, viewable in 3D, VR, and AR right on your mobile device. Explore historical wonders like Rome, examine intricate anatomical models, or stand in the presence of dinosaurs - all from the palm of your hand.  Interact with models through touch, immerse yourself in VR with a mobile headset, or use AR to bring models into your own environment.  Sketchfab allows you to connect with creators, explore curated collections, and share your favorite discoveries with friends. 

features: 
- Explore millions of 3D models
- View models in 3D, VR, and AR
- Follow favorite creators
- Explore by category or keyword search
- Like, share, and comment on models
- Subscribe to and create collections
- Share your profile and discoveries with friends
- View curated Staff Picks

adjectives: Immersive, Interactive, Educational, Inspiring, Cutting-edge

search_term: 3D models, VR, AR, 3D design, Augmented Reality 



In [31]:
import re

def extractInformation(text):
    fields = [""]*4  # four fields -- summary,features,adjectives,search_term
 
    # finding all the text between summary and features, doing the same for others as well
    start_marker = ["summary: ","features: ","adjectives: ","search_term: "]
    end_marker   = ["features:","adjectives","search_term"]
    len_prefix   = [9,10,12,13]

    for f in range(3):
        start_ind = text.index(start_marker[f])
        end_ind   = text.index(end_marker[f])
        fields[f] = text[start_ind+len_prefix[f]:end_ind]
    
    start_ind = text.index(start_marker[3])
    fields[3] = text[start_ind+len_prefix[-1]:]

    return(fields)

extracted_data = extractInformation(response)
print(extracted_data)


['Sketchfab is your portal to a vast library of 3D models, viewable in 3D, VR, and AR right on your mobile device. Explore historical wonders like Rome, examine intricate anatomical models, or stand in the presence of dinosaurs - all from the palm of your hand.  Interact with models through touch, immerse yourself in VR with a mobile headset, or use AR to bring models into your own environment.  Sketchfab allows you to connect with creators, explore curated collections, and share your favorite discoveries with friends. \n\n', '\n- Explore millions of 3D models\n- View models in 3D, VR, and AR\n- Follow favorite creators\n- Explore by category or keyword search\n- Like, share, and comment on models\n- Subscribe to and create collections\n- Share your profile and discoveries with friends\n- View curated Staff Picks\n\n', 'Immersive, Interactive, Educational, Inspiring, Cutting-edge\n\n', '3D models, VR, AR, 3D design, Augmented Reality \n']


In [32]:
print(extracted_data[0])
print(extracted_data[1])
print(extracted_data[2])
print(extracted_data[3])

Sketchfab is your portal to a vast library of 3D models, viewable in 3D, VR, and AR right on your mobile device. Explore historical wonders like Rome, examine intricate anatomical models, or stand in the presence of dinosaurs - all from the palm of your hand.  Interact with models through touch, immerse yourself in VR with a mobile headset, or use AR to bring models into your own environment.  Sketchfab allows you to connect with creators, explore curated collections, and share your favorite discoveries with friends. 



- Explore millions of 3D models
- View models in 3D, VR, and AR
- Follow favorite creators
- Explore by category or keyword search
- Like, share, and comment on models
- Subscribe to and create collections
- Share your profile and discoveries with friends
- View curated Staff Picks


Immersive, Interactive, Educational, Inspiring, Cutting-edge


3D models, VR, AR, 3D design, Augmented Reality 

