In [56]:
# This script will connect to Open AI
# Use the Scraped Data from Quora
# Generate new content based on the data collected from Quora

In [1]:
from openai import OpenAI
from openai import ChatCompletion
import tiktoken

In [2]:
# Set All Configuration Parameters Here

# ChatGPT Encoding Model
encoding = tiktoken.encoding_for_model("gpt-3.5-turbo")

# Set your OpenAI API key
client = OpenAI(api_key = '') # Enter your Open AI code


In [3]:
# FYI - Install these libraries if they does not exist
# pip install --upgrade openai
# pip install tiktoken
# pip install --upgrade tiktoken

In [4]:
import pandas as pd

# Read csv file
updated_quora_data = pd.read_csv('Quora_Scraped_Data.csv')

# Read from DataFrame and Prepare the text: Combination of the topic, question and the top 3 content

for index, row in updated_quora_data.iterrows():
        try:
            if row['Status'] == 'New':
                print('Check the content and verify it is not empty..')

                if row['Question'] is not None or row['SearchTerm'] is not None: 

                    print('Generating the AI content for number: '+ str(index) +' ...' )
                    
                    open_ai_prompt = f'''Rephrase the article below in the style of Quentin Tarantino. {row['Question']}'''

                    if len(str(row['TopContent1'])) > 50:
                        answer1 = f"Answer #1 is: {row['TopContent1']}"
                        open_ai_prompt = open_ai_prompt + answer1
                    if len(str(row['TopContent2'])) > 50:
                        answer2 = f"Answer #2 is: {row['TopContent2']}"
                        open_ai_prompt = open_ai_prompt + answer2  
                    if len(str(row['TopContent3'])) > 50:
                        answer3 = f"Answer #3 is: {row['TopContent3']}"
                        open_ai_prompt = open_ai_prompt + answer3

                    # Only generate data if there is data
                    if row['TopContent1'] is not None or row['TopContent2'] is not None or row['TopContent3'] is not None:
                        # Calculate number of tokens that the prompt will cost
                        num_tokens = len(encoding.encode(open_ai_prompt))
                        print('Num of Tokens: ' + str(num_tokens))
                        
                        if num_tokens is not None and num_tokens < 4000 :
                            # Pass the prompt to Open AI API to generate the Blog Content
                            completion = client.chat.completions.create(
                              model="gpt-3.5-turbo",
                              messages=[
                                {"role": "system", "content": "You are a content writer, skilled in writing blogs and articles with creative flair."},
                                {"role": "user", "content": open_ai_prompt }
                              ]
                            )
                            # Generated Content
                            generated_blog_content = completion.choices[0].message.content.strip()
                            #print(generated_blog_content)

                            # Find the index of the first occurrence of '\n\n' to locate the end of the title
                            title_end_index = generated_blog_content.find('\n\n')

                            # Extract the title
                            title = generated_blog_content[len('Title: '):title_end_index].strip()

                            # Extract the content 
                            content = generated_blog_content[title_end_index + len('\n\n'):].strip()

                            # Update Number of Tokens and Update DataFrame
                            updated_quora_data.loc[index, 'Prompt_Tokens'] = int(num_tokens)
                            updated_quora_data.loc[index, 'OPEN_AI_Prompt'] = open_ai_prompt
                            updated_quora_data.loc[index, 'AI_Generated_Blog_Post_Title'] = title
                            updated_quora_data.loc[index, 'AI_Generated_Blog_Post_Content'] = content
                            updated_quora_data.loc[index, 'Status'] = 'In_Review'

                            print('Generated the AI content!')
                        else: 
                            updated_quora_data.loc[index, 'Status'] = 'No_Content'
                            print('Content is Empty or too Long')
        except Exception as e:
            print('Error: ' + str(e))

Check the content and verify it is not empty..
Generating the AI content for number: 0 ...
Num of Tokens: 3297
Generated the AI content!
Check the content and verify it is not empty..
Generating the AI content for number: 1 ...
Num of Tokens: 5765
Content is Empty or too Long
Check the content and verify it is not empty..
Generating the AI content for number: 2 ...
Num of Tokens: 1565
Generated the AI content!
Check the content and verify it is not empty..
Generating the AI content for number: 3 ...
Num of Tokens: 3204
Generated the AI content!
Check the content and verify it is not empty..
Generating the AI content for number: 4 ...
Num of Tokens: 3795
Generated the AI content!
Check the content and verify it is not empty..
Generating the AI content for number: 5 ...
Num of Tokens: 1012
Generated the AI content!
Check the content and verify it is not empty..
Generating the AI content for number: 6 ...
Num of Tokens: 1416
Generated the AI content!
Check the content and verify it is not

In [5]:
updated_quora_data.head(5)

Unnamed: 0,Question,Link,NumFollow,NumUpvote,NumAnswer,SearchTerm,TopContent1,TopContent2,TopContent3,Status,TotalScore,Prompt_Tokens,OPEN_AI_Prompt,AI_Generated_Blog_Post_Title,AI_Generated_Blog_Post_Content
0,How can I become a data scientist?,https://www.quora.com/How-can-I-become-a-data-...,15400,0,1K answers,Data Science,"To get to this answer, let me spend a little t...",Most of the companies that are currently hirin...,tl;dr: Do a project you care about. Make it go...,In_Review,15400,3297.0,Rephrase the article below in the style of Que...,"about traditional paths, man. To unlock the do...","First up, you gotta know your basics. If you c..."
1,"What is the difference between Data Analytics,...",https://www.quora.com/What-is-the-difference-b...,4100,0,242 answers,Data Science,I had been wanting to take a stab at this one ...,"In today's data-driven world, it's essential t...","Data analytics, data analysis, data mining, da...",No_Content,4100,,,,
2,"What are the best, insightful blogs about data...",https://www.quora.com/What-are-the-best-insigh...,3400,0,77 answers,Data Science,Just look at the legendary Chuck Norris’s advi...,Popular CultureNate Silver: http://www.fivethi...,Exploring data science through blogs offers va...,In_Review,3400,1565.0,Rephrase the article below in the style of Que...,", here's the rephrased article in the style of...","Picture this, my fellow data enthusiasts, we'r..."
3,What is a data scientist's career path?,https://www.quora.com/What-is-a-data-scientist...,2400,0,299 answers,Data Science,Just look at the legendary Chuck Norris’s advi...,A profession in Data Science is worthwhile and...,A data science career in India is considered a...,In_Review,2400,3204.0,Rephrase the article below in the style of Que...,"realm of data science, the journey to success ...","First off, you gotta be sharp as a tack in bus..."
4,What is data science?,https://www.quora.com/What-is-data-science,2300,0,654 answers,Data Science,Data Science and Data Analytics is a field tha...,"(As of 22 July, 2016) I’ve just left an interv...",You might have come across the term “Data Scie...,In_Review,2300,3795.0,Rephrase the article below in the style of Que...,"opens with a close-up shot of a sleek, dimly-l...",What is this enigma known as Data Science? A f...


In [8]:
import numpy as np


# Generate a Blog Image based on the Blog created by Openai
open_ai_image_prompt = ''

for index,row in updated_quora_data.iterrows():
    if len(str(row['AI_Generated_Blog_Post_Title'])) > 5 and len(str(row['AI_Generated_Blog_Post_Content'])) > 5:
        try:
            open_ai_image_prompt = f'''I will provide you a Open AI Generated Blog Post Title, please 
            generate me a blog post picture.
            Blog Post Title is {row['AI_Generated_Blog_Post_Title']}'''

            response = client.images.generate(
              model="dall-e-3",
              prompt=open_ai_image_prompt,
              size="1024x1024",
              quality="standard",
              n=1,
            )

            image_url = response.data[0].url

            # Update DALL.E generated Image URL
            updated_quora_data.loc[index, 'Image_URL'] = image_url
            print(open_ai_image_prompt)
        except Exception as e:
            print('Error' + str(e))
    else:
        print('Content Data is missing. Image is not created.')
        

I will provide you a Open AI Generated Blog Post Title, please 
            generate me a blog post picture.
            Blog Post Title is about traditional paths, man. To unlock the door to becoming a data scientist, you gotta dive into a twisted journey through the intricate web of skills and knowledge. It ain't gonna be a walk in the park, but hey, nothing worth having ever came easy.
Content Data is missing. Image is not created.
ErrorError code: 400 - {'error': {'code': 'content_policy_violation', 'message': 'Your request was rejected as a result of our safety system. Your prompt may contain text that is not allowed by our safety system.', 'param': None, 'type': 'invalid_request_error'}}
I will provide you a Open AI Generated Blog Post Title, please 
            generate me a blog post picture.
            Blog Post Title is realm of data science, the journey to success ain't no straight line. It's a twisted path full of surprises and challenges, much like a Tarantino flick. You 

In [9]:
# Read csv file
scraped_quora_data = pd.read_csv('Quora_Scraped_Data.csv')

# Update existing CSV file 
for index, row in scraped_quora_data.iterrows(): 
    for index_updated, row_updated in updated_quora_data.iterrows():
        if row_updated['Link'] == row ['Link']:
            scraped_quora_data.loc[index, 'Prompt_Tokens'] = row_updated['Prompt_Tokens']
            scraped_quora_data.loc[index, 'OPEN_AI_Prompt'] = row_updated['OPEN_AI_Prompt']
            scraped_quora_data.loc[index, 'Status'] = row_updated['Status']
            scraped_quora_data.loc[index, 'AI_Generated_Blog_Post_Title'] = row_updated['AI_Generated_Blog_Post_Title']
            scraped_quora_data.loc[index, 'AI_Generated_Blog_Post_Content'] = row_updated['AI_Generated_Blog_Post_Content']
            scraped_quora_data.loc[index, 'AI_Generated_Image_URL'] = row_updated['Image_URL']

scraped_quora_data.to_csv('Quora_Scraped_Data.csv',mode = 'w', header = True, index = False)

In [10]:
import pandas as pd
import requests
from PIL import Image
import io

# Function to download an image and save it locally
def download_and_save_image(image_url, local_filename):
    try:
        # Download the image        
        response = requests.get(image_url)
        response.raise_for_status()

        # Convert image data to Pillow Image object
        image = Image.open(io.BytesIO(response.content))

        # Save the image locally as PNG
        png_filename = local_filename + '.jpeg'  # Add '.png' extension to the filename
        image.save(png_filename, format='JPEG')

        print(f"Image saved as {png_filename}.")
        
    except Exception as e:
        print(f"Error: {str(e)}")
        
scraped_image_urls = pd.read_csv('Quora_Scraped_Data.csv')

for index,row in scraped_image_urls.iterrows():
    if row['AI_Generated_Image_URL'] is not np.nan:
        image_url = row['AI_Generated_Image_URL']
        file_name_temp = row['Link']
        file_name = file_name_temp.replace("https://www.quora.com/",'')
        download_and_save_image(image_url, file_name)
        scraped_image_urls.loc[index,'Pic_File_Name'] = file_name + '.jpeg'
        print(file_name)

scraped_image_urls.to_csv('Quora_Scraped_Data.csv',mode = 'w', header = True, index = False)

Image saved as How-can-I-become-a-data-scientist.jpeg.
How-can-I-become-a-data-scientist
Image saved as What-is-a-data-scientists-career-path.jpeg.
What-is-a-data-scientists-career-path
Image saved as What-is-data-science.jpeg.
What-is-data-science
Image saved as What-are-good-ways-to-get-started-with-data-science-for-a-complete-novice.jpeg.
What-are-good-ways-to-get-started-with-data-science-for-a-complete-novice
Image saved as How-do-I-prepare-for-a-data-scientist-interview.jpeg.
How-do-I-prepare-for-a-data-scientist-interview
Image saved as Which-institute-is-the-best-for-the-data-science-course-in-Delhi.jpeg.
Which-institute-is-the-best-for-the-data-science-course-in-Delhi
Image saved as Which-institute-is-best-for-a-data-science-course-in-Bangalore.jpeg.
Which-institute-is-best-for-a-data-science-course-in-Bangalore
Image saved as Which-is-the-best-institute-for-data-scientist-courses.jpeg.
Which-is-the-best-institute-for-data-scientist-courses
Image saved as What-are-some-of-the-