

# 1. Setup

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
pip install transformers



In [None]:
pip install pypdf

Collecting pypdf
  Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypdf
Successfully installed pypdf-4.2.0


In [None]:
import pandas as pd
from pypdf import PdfReader
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
import re
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

# 2. Preprocess

In [None]:
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])

In [None]:
resume_data = pd.read_csv("/content/drive/MyDrive/JM/Resume.csv")
resume_data = resume_data.drop(["Resume_html"], axis=1)
resume_data = resume_data.apply(process_resume_data, axis=1)
resume_data = resume_data.drop(columns=['Resume_str'])
resume_data.to_csv("/content/drive/MyDrive/JM/resume_data.csv", index=False)

In [None]:
job_description = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")
job_description = job_description[["job_description", "position_title"]]
job_description['Features'] = job_description['job_description'].apply(lambda x : preprocess_text(x)['feature'])
job_description.to_csv("/content/drive/MyDrive/JM/job_data.csv", index=False)
job_description

Unnamed: 0,job_description,position_title,Features
0,minimum qualifications\nbachelors degree or eq...,Sales Specialist,minimum qualifications bachelors degree equiva...
1,description\nas an asc you will be highly infl...,Apple Solutions Consultant,description asc highly influential growing min...
2,its an amazing time to be joining netflix as w...,Licensing Coordinator - Consumer Products,amazing time joining netflix continue transfor...
3,description\n\nweb designers looking to expand...,Web Designer,description web designers looking expand profe...
4,at trackfive weve got big goals were on a miss...,Web Developer,trackfive weve got big goals mission revolutio...
...,...,...,...
848,job description\n\nparttime\n\nmake big money ...,Management Internship,job description parttime make big money menard...
849,responsibilities\nparkers internship program w...,Human Resources Internship - Corporate (Year-...,responsibilities parkers internship program es...
850,the borgen project is an innovative national ...,Writer / Journalist Internship,borgen project innovative national campaign wo...
851,put the world on vacation\n\nat wyndham destin...,Inbound Customer Service / Sales (Remote),put world vacation wyndham destinations missio...


# 3. Embedding Using Bert

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [None]:
job_desc_embeddings = np.array([get_embeddings(desc, model_name) for desc in job_description['Features']]).squeeze()
np.save("/content/drive/MyDrive/JM/temp/job_embedding", job_desc_embeddings)

In [None]:
resume_embeddings = np.array([get_embeddings(text, model_name) for text in resume_data['Feature']]).squeeze()
np.save("/content/drive/MyDrive/JM/temp/resume_embedding", job_desc_embeddings)

In [None]:
df_load = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')

In [None]:
df_load2 = pd.read_csv('/content/drive/MyDrive/JM/training_data.csv')

Unnamed: 0,company_name,job_description,position_title,description_length,model_response
0,Google,minimum qualifications\nbachelors degree or eq...,Sales Specialist,2727,"{\n ""Core Responsibilities"": ""Responsible fo..."
1,Apple,description\nas an asc you will be highly infl...,Apple Solutions Consultant,828,"{\n ""Core Responsibilities"": ""as an asc you ..."
2,Netflix,its an amazing time to be joining netflix as w...,Licensing Coordinator - Consumer Products,3205,"{\n ""Core Responsibilities"": ""Help drive bus..."
3,Robert Half,description\n\nweb designers looking to expand...,Web Designer,2489,"{\n ""Core Responsibilities"": ""Designing webs..."
4,TrackFive,at trackfive weve got big goals were on a miss...,Web Developer,3167,"{\n ""Core Responsibilities"": ""Build and layo..."
5,DesignUps,designups is a nashville based design and inte...,Frontend Web Developer,892,"{\n ""Core Responsibilities"": ""Translate desi..."
6,"Equisolve, Inc.",about the position\n\nthe web designer is resp...,Remote Website Designer,3471,"{\n ""Core Responsibilities"": ""Provide design..."
7,Zander Insurance Agency,job description\n\nzander insurance group is o...,Web Designer,2896,"{\n ""Core Responsibilities"": ""Design compell..."


# 4. Matching Algorithm

In [None]:
result_df = pd.DataFrame(columns=['jobId', 'resumeId', 'similarity', 'domainResume', 'domainDesc'])
for i, job_desc_emb in enumerate(job_desc_embeddings):
    similarities = cosine_similarity([job_desc_emb], resume_embeddings)
    top_k_indices = np.argsort(similarities[0])[::-1][:5]
    for j in top_k_indices:
        result_df.loc[i+j] = [i, resume_data['ID'].iloc[j], similarities[0][j], resume_data['Category'].iloc[j], job_description['position_title'].iloc[i]]

result_df = result_df.sort_values(by='similarity', ascending=False)
result_group = result_df.groupby("jobId")
print_top_matching_resumes(result_group)


Job ID: 0
Cosine Similarity | Domain Resume | Domain Description
      similarity            domainResume        domainDesc
629     0.938355    BUSINESS-DEVELOPMENT  Sales Specialist
299     0.930880  INFORMATION-TECHNOLOGY  Sales Specialist
577     0.930776    BUSINESS-DEVELOPMENT  Sales Specialist
1235    0.930246           DIGITAL-MEDIA  Sales Specialist
1045    0.929638                   SALES  Sales Specialist

Job ID: 1
Cosine Similarity | Domain Resume | Domain Description
      similarity   domainResume                  domainDesc
2311    0.908976           ARTS  Apple Solutions Consultant
1048    0.903436          SALES  Apple Solutions Consultant
2151    0.901145        BANKING  Apple Solutions Consultant
1300    0.899512  DIGITAL-MEDIA  Apple Solutions Consultant
1227    0.893605  DIGITAL-MEDIA  Apple Solutions Consultant

Job ID: 2
Cosine Similarity | Domain Resume | Domain Description
      similarity   domainResume                                 domainDesc
2289    0.956

#5. Sample Test

In [None]:
pip install pypdf

Collecting pypdf
  Downloading pypdf-4.2.0-py3-none-any.whl (290 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m290.4/290.4 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: pypdf
Successfully installed pypdf-4.2.0


In [None]:
import pandas as pd
from pypdf import PdfReader
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
import re
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])

In [None]:
job_desc_embeddings= np.load("/content/drive/MyDrive/JM/temp/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')
resume_data = pd.read_csv('/content/drive/MyDrive/JM/resume_data.csv')


device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [None]:
company_name = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")['company_name']
company_name[0]

'Google'

In [None]:
sample = "html 3 years and hompage server web application development engineer design java script"
sample_embeddings = get_embeddings(sample, model_name)
sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])

similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
for i in range(10):
    sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]

sample_df = sample_df.sort_values(by='similarity', ascending=False)
sample_df

Unnamed: 0,jobId,company,similarity,domainDesc
0,13,Themesoft Inc,82.12505,Wordpress Web Developer
1,62,Dreamory Entertainment Group Sdn Bhd,77.869904,Full Stack Web Developer
2,288,Blue Frog Solutions | Marketing Agency,77.398646,HTML and Wordpress Developer
3,67,Bright Nexus (M) Sdn Bhd,75.662899,Web Developer
4,95,Remote Team Inc.,73.565805,Software Developer
5,66,Grand-flo Spritvest Sdn Bhd,73.536313,Software Engineer (Web)
6,347,"VSV WINS, INC",73.202837,Remote Project Manager
7,416,"NITYA Software Solutions, Inc.",72.943342,Windows Packaging Engineer
8,289,Swifty Web Agency,71.931481,Wordpress Developer
9,68,MEGASAP,71.868646,Interns (Web Developers Mobile Developers Game...


In [None]:
 job_description.iloc[298]['job_description']

'legal fellowships the candidates should have diverse cultural backgrounds andor proficiency in languages other than english to contact us considering the time needed to develop projects acceptable to firm and the applicants interested persons should contact firm as soon as possible'

#6. Web Deployment Using Gradio

In [None]:
!pip install gradio_client
!pip install -U gradio

Collecting gradio_client
  Downloading gradio_client-0.17.0-py3-none-any.whl (316 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m316.3/316.3 kB[0m [31m4.4 MB/s[0m eta [36m0:00:00[0m
Collecting httpx>=0.24.1 (from gradio_client)
  Downloading httpx-0.27.0-py3-none-any.whl (75 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m75.6/75.6 kB[0m [31m6.6 MB/s[0m eta [36m0:00:00[0m
Collecting websockets<12.0,>=10.0 (from gradio_client)
  Downloading websockets-11.0.3-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (129 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m129.9/129.9 kB[0m [31m5.3 MB/s[0m eta [36m0:00:00[0m
Collecting httpcore==1.* (from httpx>=0.24.1->gradio_client)
  Downloading httpcore-1.0.5-py3-none-any.whl (77 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m77.9/77.9 kB[0m [31m6.3 MB/s[0m eta [36m0:00:00[0m
Collecting h11<0.15,>=0

In [None]:
pip install pypdf



In [None]:
import pandas as pd
from pypdf import PdfReader
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
import re
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.


True

In [None]:
import torch
import requests
import gradio as gr

import pandas as pd
import pandas as pd
from pypdf import PdfReader
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import string
import re
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

job_desc_embeddings= np.load("/content/drive/MyDrive/JM/temp/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')
resume_data = pd.read_csv('/content/drive/MyDrive/JM/resume_data.csv')
company_name = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")['company_name']

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])


def infer(sample):
    try :
        sample_embeddings = get_embeddings(sample, model_name)
        sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])

        similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
        top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
        for i in range(10):
            sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]
        result = sample_df.sort_values(by='similarity', ascending=False)

        return result
    except :
        result = "뭔가 잘못됬어요.."
    return result



demo = gr.Interface(
    fn=infer,
    inputs=[gr.Textbox(value="이력서를 작성하세요",label="내용") ],
    outputs=[gr.Dataframe(label="매칭결과")],
)

demo.launch()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6384975b3f6d013888.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




#6.5. 시연용 임시 EC2 배포

In [None]:
import torch
import pandas as pd
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import re
from tqdm import tqdm
from transformers import AutoModel, AutoTokenizer
import torch
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import nltk

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

job_desc_embeddings= np.load("/content/drive/MyDrive/JM/temp/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')
resume_data = pd.read_csv('/content/drive/MyDrive/JM/resume_data.csv')
company_name = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")['company_name']

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])


def infer(sample):
    try :
        sample_embeddings = get_embeddings(sample, model_name)
        sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])

        similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
        top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
        for i in range(10):
            sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]
        result = sample_df.sort_values(by='similarity', ascending=False)

        return result
    except :
        result = "뭔가 잘못됬어요.."
    return result



demo = gr.Interface(
    fn=infer,
    inputs=[gr.Textbox(value="이력서를 작성하세요",label="내용") ],
    outputs=[gr.Dataframe(label="매칭결과")],
)

demo.launch()

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!


Setting queue=True in a Colab notebook requires sharing enabled. Setting `share=True` (you can turn this off by setting `share=False` in `launch()` explicitly).

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
Running on public URL: https://6384975b3f6d013888.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from Terminal to deploy to Spaces (https://huggingface.co/spaces)




In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6


In [None]:
import torch
import pandas as pd
import nltk
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import re
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

In [None]:
job_desc_embeddings= np.load("/content/drive/MyDrive/JM/temp/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')
resume_data = pd.read_csv('/content/drive/MyDrive/JM/resume_data.csv')
company_name = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")['company_name']

In [None]:
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

BertModel(
  (embeddings): BertEmbeddings(
    (word_embeddings): Embedding(30522, 768, padding_idx=0)
    (position_embeddings): Embedding(512, 768)
    (token_type_embeddings): Embedding(2, 768)
    (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (encoder): BertEncoder(
    (layer): ModuleList(
      (0-11): 12 x BertLayer(
        (attention): BertAttention(
          (self): BertSdpaSelfAttention(
            (query): Linear(in_features=768, out_features=768, bias=True)
            (key): Linear(in_features=768, out_features=768, bias=True)
            (value): Linear(in_features=768, out_features=768, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
          )
          (output): BertSelfOutput(
            (dense): Linear(in_features=768, out_features=768, bias=True)
            (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
            (dropout): Dropout(p=0.1, inplace=False

In [None]:
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])


In [None]:
sample_embeddings = get_embeddings("hi", model_name)
sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])

similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])
similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
for i in range(10):
    sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]
result = sample_df.sort_values(by='similarity', ascending=False)

result

Unnamed: 0,jobId,company,similarity,domainDesc
0,281,Test Company,81.717455,Test position
1,607,Testing,69.222867,test
2,393,choice recovery example,60.143238,coach
3,80,Grasstik,49.022719,Commission Sales Associate
4,287,Choice Recovery,41.610759,Marketing Director
5,824,The Grove,38.867855,Sales Associate
6,101,AD ZipRecruiter,36.098239,"Bank President Jobs in Columbus, Ohio"
7,757,"Meijer, Inc.",35.788521,Grocery Inventory clerk Part Time
8,68,MEGASAP,35.713771,Interns (Web Developers Mobile Developers Game...
9,293,Explore Job Search,34.470782,Data Entry Clerk


In [None]:
!pip install pyngrok

Collecting pyngrok
  Downloading pyngrok-7.1.6-py3-none-any.whl (22 kB)
Installing collected packages: pyngrok
Successfully installed pyngrok-7.1.6


In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import torch
import pandas as pd
import nltk
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import re
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

ngrok.set_auth_token("2hUa7i9sadOHtW7AxaqpAaFStq2_4ASGwWt5m6nLfs58jFUtJ")
app = Flask(__name__)

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

job_desc_embeddings= np.load("/content/drive/MyDrive/JM/temp/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')
resume_data = pd.read_csv('/content/drive/MyDrive/JM/resume_data.csv')
company_name = pd.read_csv("/content/drive/MyDrive/JM/training_data.csv")['company_name']
job_description = pd.read_csv('/content/drive/MyDrive/JM/job_data.csv')

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

##################################
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])
##################################################

@app.route('/ai_test', methods=['GET']) ###프로필정보 가져오기     --------------------------------------------------------------------------------------
def ai_test():
    input_text = request.args.get('input_text')

    sample_embeddings = get_embeddings(input_text, model_name)
    sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])
    similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
    top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
    for i in range(10):
        sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]
    result = sample_df.sort_values(by='similarity', ascending=False)
    result = result.to_json(orient='records')
    return result

@app.route('/get_desc', methods=['GET']) ###프로필정보 가져오기     --------------------------------------------------------------------------------------
def get_desc():
    id = request.args.get('id')
    id = int(id)

    return job_description.iloc[id]['job_description']


if __name__ == "__main__":
    port = 5000
    public_url = ngrok.connect(port)
    print(f"Public URL: {public_url}")
    app.run(port=5000)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.
[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     /root/nltk_data...
[nltk_data]   Unzipping taggers/averaged_perceptron_tagger.zip.
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


tokenizer_config.json:   0%|          | 0.00/48.0 [00:00<?, ?B/s]



config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

Public URL: NgrokTunnel: "https://5188-34-31-6-137.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:02:50] "GET /get_desc?id=23 HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:02:53] "GET /ai_test?input_text=hi HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:03:14] "GET /ai_test?input_text=asd HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:09:36] "GET /get_desc?id=23 HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:09:53] "GET /ai_test?input_text=7%20years%20experience%20lawyer HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:09:55] "GET /ai_test?input_text=ㅇㅇ HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:29:46] "GET /ai_test?input_text=Rf HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:31:19] "GET /ai_test?input_text=hi HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:49:26] "GET /ai_test?input_text=ㅇㅇ HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [27/Jun/2024 05:49:27

In [None]:
from flask import Flask, request, jsonify
from pyngrok import ngrok
import torch
import pandas as pd
import nltk
from nltk import pos_tag, sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import re
from transformers import AutoModel, AutoTokenizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

ngrok.set_auth_token("2hUa7i9sadOHtW7AxaqpAaFStq2_4ASGwWt5m6nLfs58jFUtJ")
app = Flask(__name__)

nltk.download('punkt')
nltk.download('stopwords')
nltk.download('averaged_perceptron_tagger')

job_desc_embeddings= np.load("/home/ubuntu/ai_test/job_embedding.npy", allow_pickle = True)
job_description = pd.read_csv('/home/ubuntu/ai_test/job_data.csv')
resume_data = pd.read_csv('/home/ubuntu/ai_test/resume_data.csv')
company_name = pd.read_csv("/home/ubuntu/ai_test/training_data.csv")['company_name']

device = "cuda" if torch.cuda.is_available() else "cpu"
model_name = "bert-base-uncased"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
model.to(device)

##################################
def extract_text_from_pdf(file_path):
    reader = PdfReader(file_path)
    text = "".join(page.extract_text() for page in reader.pages)
    return text

def preprocess_text(text):
    text = text.lower()
    text = re.sub('[^a-zA-Z]', ' ', text)
    features = {'feature': ""}
    stop_words = set(stopwords.words("english"))
    words = word_tokenize(text)
    words = [word for word in words if word not in stop_words]
    tagged_words = pos_tag(words)
    filtered_words = [word for word, tag in tagged_words if tag not in ['DT', 'IN', 'TO', 'PRP', 'WP']]
    features['feature'] += " ".join(filtered_words)
    return features

def process_resume_data(df):
    id = df['ID']
    category = df['Category']
    text = extract_text_from_pdf(f"/content/drive/MyDrive/JM/data/{category}/{id}.pdf")
    features = preprocess_text(text)
    df['Feature'] = features['feature']
    return df

def get_embeddings(text, model_name):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    inputs = tokenizer(str(text), return_tensors="pt", truncation=True, padding=True).to(device)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1).detach().to("cpu").numpy()
    return embeddings

def print_top_matching_resumes(result_group):
    for i in range(15):
        print("\nJob ID:", i)
        print("Cosine Similarity | Domain Resume | Domain Description")
        print(result_group.get_group(i)[['similarity', 'domainResume', 'domainDesc']])
##################################################

@app.route('/ai_test', methods=['GET']) ###프로필정보 가져오기     --------------------------------------------------------------------------------------
def ai_test():
    input_text = request.args.get('input_text')

    sample_embeddings = get_embeddings(input_text, model_name)
    sample_df = pd.DataFrame(columns=['jobId', 'company', 'similarity', 'domainDesc'])
    similarities = cosine_similarity(job_desc_embeddings, sample_embeddings)
    top_k_indices = np.argsort(similarities.squeeze())[::-1][:10]
    for i in range(10):
        sample_df.loc[i] = [top_k_indices[i], company_name[top_k_indices[i]], float(similarities[top_k_indices[i]][0]*100), job_description['position_title'].iloc[top_k_indices[i]]]
    result = sample_df.sort_values(by='similarity', ascending=False)
    result = result.to_dict(orient='index')
    return jsonify(result)


if __name__ == "__main__":
    port = 5000
    public_url = ngrok.connect(port)
    print(f"Public URL: {public_url}")
    app.run(port=5000)

# 7. Rest API

In [None]:
pip install pymysql



In [None]:
!pip3 install flask --ignore-installed embedchain

Collecting flask
  Using cached flask-3.0.3-py3-none-any.whl (101 kB)
Collecting embedchain
  Downloading embedchain-0.1.105-py3-none-any.whl (194 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m194.8/194.8 kB[0m [31m4.0 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting Werkzeug>=3.0.0 (from flask)
  Using cached werkzeug-3.0.3-py3-none-any.whl (227 kB)
Collecting Jinja2>=3.1.2 (from flask)
  Downloading jinja2-3.1.4-py3-none-any.whl (133 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m133.3/133.3 kB[0m [31m9.3 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting itsdangerous>=2.1.2 (from flask)
  Downloading itsdangerous-2.2.0-py3-none-any.whl (16 kB)
Collecting click>=8.1.3 (from flask)
  Downloading click-8.1.7-py3-none-any.whl (97 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m97.9/97.9 kB[0m [31m11.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting blinker>=1.6.2 (from flask)
  Using cached blinker-1.8.2-py3-none-any.whl (9.5

In [None]:
# app.py
from flask import Flask, request, jsonify
import pymysql

app = Flask(__name__)

# RDS MySQL 접속 정보
db_host = 'ec2-3-35-168-11.ap-northeast-2.compute.amazonaws.com'
db_user = 'admin'
db_password = '9120ssf3'
db_name = 'test1'

def get_db_connection():
    connection = pymysql.connect(host=db_host,
                                 user=db_user,
                                 password=db_password,
                                 database=db_name,
                                 cursorclass=pymysql.cursors.DictCursor)
    return connection

@app.route('/api/data', methods=['GET'])
def get_data():
    connection = get_db_connection()
    try:
        with connection.cursor() as cursor:
            sql = "SELECT * FROM table1"
            cursor.execute(sql)
            result = cursor.fetchall()
            return jsonify(result)
    finally:
        connection.close()

if __name__ == '__main__':
    app.run(host='0.0.0.0', port=5000)

ModuleNotFoundError: No module named 'pymysql'

In [None]:
# 필요한 라이브러리 설치
!apt-get install openssh-client

# .pem 파일의 권한 설정 (기본 경로는 /content/)
!chmod 400 /content/key/your-key-pair.pem

# .pem 파일과 전송할 파일을 정의
key_path = '/content/key/your-key-pair.pem'
file_to_send = '/content/your-file.py'
ec2_user = 'ubuntu'
ec2_ip = 'your-ec2-public-ip'

# scp 명령 실행
!scp -i {key_path} {file_to_send} {ec2_user}@{ec2_ip}:/home/ubuntu/


In [None]:
from flask import Flask

app = Flask(__name__)

@app.route('/')
def home():
    return "Hello, Colab!"

if __name__ == '__main__':
    app.run()

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m


In [None]:
ngrok.set_auth_token("2hUa7i9sadOHtW7AxaqpAaFStq2_4ASGwWt5m6nLfs58jFUtJ")

app = Flask(__name__)

@app.route('/')
def home():
    return "Hello, Colab!"

if __name__ == '__main__':
    public_url = ngrok.connect(5000)
    print(f'Public URL: {public_url}')

    # Flask 서버 실행
    app.run()

Public URL: NgrokTunnel: "https://3a06-35-204-248-197.ngrok-free.app" -> "http://localhost:5000"
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5000
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [06/Jun/2024 05:21:31] "GET / HTTP/1.1" 200 -
INFO:werkzeug:127.0.0.1 - - [06/Jun/2024 05:21:32] "[33mGET /favicon.ico HTTP/1.1[0m" 404 -
