In [69]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense, Dropout

# Load and preprocess data
data = pd.read_csv('valuable_data_lastone.csv')
text_cols = ['position 1', 'position 2', 'experince 1', 'experince 2', 
             'field of studies 1', 'field of studies 2', 'degree 1', 
             'degree 2', 'industry', 'skills', 'influencer', 'country', 'summary']

data[text_cols] = data[text_cols].fillna("Unknown")

# Convert all columns to string type
for col in text_cols:
    data[col] = data[col].astype(str)

text_data = data[text_cols].apply(lambda x: ' '.join(x), axis=1).tolist()
labels = data['characters'].tolist()

# Tokenize and pad sequences
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
sequences = tokenizer.texts_to_sequences(text_data)
max_sequence_length = max(len(seq) for seq in sequences)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Split data
X_train_text, X_test_text, y_train, y_test = train_test_split(
    padded_sequences, labels, test_size=0.1, random_state=42)

# Define and compile the model
model = Sequential([
    Embedding(len(tokenizer.word_index) + 1, 100, input_length=max_sequence_length),
    LSTM(100, dropout=0.2, recurrent_dropout=0.2),
    Dense(64, activation='relu'),
    Dropout(0.5),
    Dense(1, activation='sigmoid')  # Use 'sigmoid' for binary classification
])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_text, y_train, validation_data=(X_test_text, y_test),
          epochs=10, batch_size=32)

# Save the model
model.save('simplified_regularized_model.h5')


ValueError: Failed to find data adapter that can handle input: <class 'numpy.ndarray'>, (<class 'list'> containing values of types {"<class 'str'>"})

In [33]:
import requests
import pandas as pd
import datetime
import csv

positions0=[]
positions1=[]
experinces0=[]
experinces1=[]
skills=[]
influencer=[]
country=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
names=[]
summaries=[]
certifications=[]
# model_crystal\datas\13000_profiles_without_duplicates.csv
df = pd.read_csv('../model_crystal/datas/13000_profiles_without_duplicates.csv')
for csv in range(10):
	res=pd.read_csv(f'../model_crystal/datas/responses/{csv}.csv')
	
	try:
		skills.append(res['skills'])
	except:
		skills.append(None)
	try:
		position0=res['position_groups'][0]['profile_positions'][0]['title']
		print(position0)
		positions0.append(position0)
	except:
		positions0.append(None)

	try:
		position1=res['position_groups'][1]['profile_positions'][0]['title']
		positions1.append(position1)
	except:
		positions1.append(None)

	experince0=0
	try:
		experince0=res['position_groups'][0]['date']['start']['year']-res['position_groups'][0]['date']['end']['year']
		experinces0.append(abs(experince0))
	except:
		try:
			experince0=datetime.date.today().year - res['position_groups'][0]['date']['start']['year']
			experinces0.append(abs(experince0))
		except:
			experinces0.append(None)

	experince1=0
	try:
		experince1=res['position_groups'][1]['date']['start']['year']-res['position_groups'][1]['date']['end']['year']
		experinces1.append(abs(experince1))
	except:
		try:
			experince1=datetime.date.today().year - res['position_groups'][1]['date']['start']['year']
			experinces1.append(abs(experince1))
		except:
			experinces1.append(None)
				
	try:
		influencer.append(res['influencer'])
	except:
		influencer.append(None)
	try:
		country.append(res['location']['country'])
	except:
		country.append(None)
	field_of_study0=""
	try:
		field_of_study0=res['education'][0]['field_of_study']
		field_of_studies0.append(field_of_study0)
	except:
		field_of_study0=None
		field_of_studies0.append(field_of_study0)

	try:
		field_of_study1=res['education'][1]['field_of_study']
		field_of_studies1.append(field_of_study1)
	except:
		field_of_study1=None
		field_of_studies1.append(field_of_study1)

	try:
		degree_name0=res['education'][0]['degree_name']
		degrees0.append(degree_name0)
	except:
		degree_name0=None
		degrees0.append(degree_name0)

	try:
		degree_name1=res['education'][1]['degree_name']
		degrees1.append(degree_name1)
	except:
		degree_name1=None
		degrees1.append(degree_name1)

	try:
		industries.append(res['industry'])
	except:
		industries.append(None)
	try:
		name=res['first_name']+" "+res['last_name']
		names.append(name)
		print(name)
	except:
		names.append(None)
	try:
		if len(res[certifications])==0:
			certifications.append(None)
		for certify in res['certifications']:
			certifications.append(certify['name'])
	except:
		certifications.append(None)
	try:
		summaries.append(res['summary'])
	except:
		summaries.append(None)

df2=pd.DataFrame({
	"name":names,
	'position 1':positions0,
	'position 2':positions1,
	"experince 1":experinces0,
	"experince 2":experinces1,
	"field of studies 1":field_of_studies0,
	"field of studies 2":field_of_studies1,
	"degree 1":degrees0,
	"degree 2":degrees1,
	"industry":industries,
	"certifications":certifications,
	"skills":skills,
	"influencer":influencer,
	"country":country,
	"summary":summaries,
})
d = pd.concat([df, df2], axis=1)
d.to_csv('valuable_data_last.csv')

0    Laurel Rohloff
dtype: object
0    Merel Keijzer
dtype: object
0    Amy MacGregor
dtype: object
0    Sarah Johnson
dtype: object
0    Annemarie Voyce
dtype: object
0    Elise Adayme
dtype: object
0    Aidan Quinn
dtype: object
0    Megan Hershey
dtype: object
0    Andreea (Andi) Dumitru
dtype: object
0    Claire Niven
dtype: object
max_sequence------>>>>519
Text: Frontend mentor 	Predicted Label: Analyst (C)
Text: Javascript Developer	Predicted Label: Driver (Di);
Text: 0	Predicted Label: Analyst (C)
Text: Petroleum Engineering	Predicted Label: Captain (D)
Text: Oil and Gas	Predicted Label: Analyst (C)
Text: Master of Science - MS	Predicted Label: Driver (Di);
Text: Bachelor's degree	Predicted Label: Analyst (C)
Text: Computer Software	Predicted Label: Encourager (Is)
Text: ['Next.js', 'Python (Programming Language)', 'Django', 'REST APIs', 'Problem Solving', 'Responsive Web Design', 'Front-End Development', 'Firebase', 'Web Development', 'Communication', 'Contract Negotiation', 'C

In [32]:
df2

Unnamed: 0,name,position 1,position 2,experince 1,experince 2,field of studies 1,field of studies 2,degree 1,degree 2,industry,certifications,skills,influencer,country,summary
0,0 Laurel Rohloff dtype: object,,,,,,,,,0 Primary/Secondary Education Name: industr...,,"0 ['Parler en public', 'Réseaux sociaux', '...","0 False Name: influencer, dtype: bool",,"0 NaN Name: summary, dtype: float64"
1,0 Merel Keijzer dtype: object,,,,,,,,,"0 Research Name: industry, dtype: object",,"0 ['Statistical Analysis of fMRI Data', 'St...","0 False Name: influencer, dtype: bool",,0 I am a second language development and bi...
2,0 Amy MacGregor dtype: object,,,,,,,,,"0 Marketing & Advertising Name: industry, d...",,"0 ['Proofreading', 'Press Releases', 'Publi...","0 False Name: influencer, dtype: bool",,0 English teacher with 5 years experience w...
3,0 Sarah Johnson dtype: object,,,,,,,,,"0 Higher Education Name: industry, dtype: o...",,"0 ['Public Speaking', 'Coaching', 'Teaching...","0 False Name: influencer, dtype: bool",,0 Over a period of more than 10 years of la...
4,0 Annemarie Voyce dtype: object,,,,,,,,,"0 Higher Education Name: industry, dtype: o...",,"0 ['Business English', 'Secondary Education...","0 False Name: influencer, dtype: bool",,0 I previously worked as a secondary school...
5,0 Elise Adayme dtype: object,,,,,,,,,0 Translation & Localization Name: industry...,,"0 ['Translation', 'English', 'French', 'Pre...","0 False Name: influencer, dtype: bool",,0 Experienced freelance translator and inte...
6,0 Aidan Quinn dtype: object,,,,,,,,,0 Primary/Secondary Education Name: industr...,,"0 ['Public Speaking', 'Research', 'Manageme...","0 False Name: influencer, dtype: bool",,0 I am a qualified secondary school teacher...
7,0 Megan Hershey dtype: object,,,,,,,,,0 Translation & Localization Name: industry...,,"0 ['Translation', 'English', 'Dutch', 'Text...","0 False Name: influencer, dtype: bool",,0 I’m a professional Dutch-to-English trans...
8,0 Andreea (Andi) Dumitru dtype: object,,,,,,,,,0 Professional Training & Coaching Name: in...,,"0 ['E-Learning', 'Blended Learning', 'Leade...","0 False Name: influencer, dtype: bool",,0 Does your speech resonate with your peers...
9,0 Claire Niven dtype: object,,,,,,,,,0 Translation & Localization Name: industry...,,"0 ['Translation', 'Copy Editing', 'English ...","0 False Name: influencer, dtype: bool",,0 You want your English copy to sound as go...


In [25]:
d

Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,names,links,characters,name,position 1,position 2,experince 1,experince 2,field of studies 1,field of studies 2,degree 1,degree 2,industry,certifications,skills,influencer,country,summary
0,0,0.0,Laurel Rohloff,https://www.linkedin.com/in/laurel-rohloff-a52...,Counselor (Si),0 Laurel Rohloff dtype: object,,,,,,,,,0 Primary/Secondary Education Name: industr...,,"0 ['Parler en public', 'Réseaux sociaux', '...","0 False Name: influencer, dtype: bool",,"0 NaN Name: summary, dtype: float64"
1,1,1.0,Merel Keijzer,https://www.linkedin.com/in/merel-keijzer-7263...,Influencer (Id),0 Merel Keijzer dtype: object,,,,,,,,,"0 Research Name: industry, dtype: object",,"0 ['Statistical Analysis of fMRI Data', 'St...","0 False Name: influencer, dtype: bool",,0 I am a second language development and bi...
2,2,2.0,Amy Macgregor,https://www.linkedin.com/in/amy-macgregor-5325...,Supporter (S),0 Amy MacGregor dtype: object,,,,,,,,,"0 Marketing & Advertising Name: industry, d...",,"0 ['Proofreading', 'Press Releases', 'Publi...","0 False Name: influencer, dtype: bool",,0 English teacher with 5 years experience w...
3,3,3.0,Sarah Johnson,https://www.linkedin.com/in/sarah-johnson-54a4...,Influencer (Id),0 Sarah Johnson dtype: object,,,,,,,,,"0 Higher Education Name: industry, dtype: o...",,"0 ['Public Speaking', 'Coaching', 'Teaching...","0 False Name: influencer, dtype: bool",,0 Over a period of more than 10 years of la...
4,4,4.0,Anne Voyce,https://www.linkedin.com/in/avoyce,Motivator (I),0 Annemarie Voyce dtype: object,,,,,,,,,"0 Higher Education Name: industry, dtype: o...",,"0 ['Business English', 'Secondary Education...","0 False Name: influencer, dtype: bool",,0 I previously worked as a secondary school...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13086,13086,,Tosin Obayuwana,https://www.linkedin.com/in/tosin-obayuwana-a3...,Initiator (DI),,,,,,,,,,,,,,,
13087,13087,,Carolina Montenegro,https://www.linkedin.com/in/carolina-montenegr...,Encourager (Is),,,,,,,,,,,,,,,
13088,13088,,Cristyna Montenegro,https://www.linkedin.com/in/cristynamontenegro,Analyst (C),,,,,,,,,,,,,,,
13089,13089,,Lorena Montenegro,https://www.linkedin.com/in/lorena-montenegro-...,Editor (Cs),,,,,,,,,,,,,,,


In [9]:
df=pd.read_csv('../model_crystal/valuable_data_last.csv')

In [23]:
res['position_groups'][0]['profile_positions'][0]['title']

'Frontend mentor '

In [10]:
df

Unnamed: 0.3,Unnamed: 0.2,Unnamed: 0.1,Unnamed: 0,names,links,characters,name,position 1,position 2,experince 1,...,field of studies 1,field of studies 2,degree 1,degree 2,industry,certifications,skills,influencer,country,summary
0,0,0,0.0,Laurel Rohloff,https://www.linkedin.com/in/laurel-rohloff-a52...,Counselor (Si),0 Laurel Rohloff\ndtype: object,,,,...,,,,,0 Primary/Secondary Education\nName: indust...,,"0 ['Parler en public', 'Réseaux sociaux', '...","0 False\nName: influencer, dtype: bool",,"0 NaN\nName: summary, dtype: float64"
1,1,1,1.0,Merel Keijzer,https://www.linkedin.com/in/merel-keijzer-7263...,Influencer (Id),0 Merel Keijzer\ndtype: object,,,,...,,,,,"0 Research\nName: industry, dtype: object",,"0 ['Statistical Analysis of fMRI Data', 'St...","0 False\nName: influencer, dtype: bool",,0 I am a second language development and bi...
2,2,2,2.0,Amy Macgregor,https://www.linkedin.com/in/amy-macgregor-5325...,Supporter (S),0 Amy MacGregor\ndtype: object,,,,...,,,,,"0 Marketing & Advertising\nName: industry, ...",,"0 ['Proofreading', 'Press Releases', 'Publi...","0 False\nName: influencer, dtype: bool",,0 English teacher with 5 years experience w...
3,3,3,3.0,Sarah Johnson,https://www.linkedin.com/in/sarah-johnson-54a4...,Influencer (Id),0 Sarah Johnson\ndtype: object,,,,...,,,,,"0 Higher Education\nName: industry, dtype: ...",,"0 ['Public Speaking', 'Coaching', 'Teaching...","0 False\nName: influencer, dtype: bool",,0 Over a period of more than 10 years of la...
4,4,4,4.0,Anne Voyce,https://www.linkedin.com/in/avoyce,Motivator (I),0 Annemarie Voyce\ndtype: object,,,,...,,,,,"0 Higher Education\nName: industry, dtype: ...",,"0 ['Business English', 'Secondary Education...","0 False\nName: influencer, dtype: bool",,0 I previously worked as a secondary school...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13086,13086,13086,,Tosin Obayuwana,https://www.linkedin.com/in/tosin-obayuwana-a3...,Initiator (DI),0 Tosin Obayuwana\ndtype: object,,,,...,,,,,"0 Management Consulting\nName: industry, dt...",,"0 ['Business Analysis', 'Team Management', ...","0 False\nName: influencer, dtype: bool",,0 I am a goal getter that have not been re...
13087,13087,13087,,Carolina Montenegro,https://www.linkedin.com/in/carolina-montenegr...,Encourager (Is),0 Carolina Montenegro\ndtype: object,,,,...,,,,,"0 International Affairs\nName: industry, dt...",,"0 ['Journalism', 'International Relations',...","0 False\nName: influencer, dtype: bool",,"0 NaN\nName: summary, dtype: float64"
13088,13088,13088,,Cristyna Montenegro,https://www.linkedin.com/in/cristynamontenegro,Analyst (C),0 Cristyna Montenegro\ndtype: object,,,,...,,,,,"0 Research\nName: industry, dtype: object",,"0 ['C++', 'Programming', 'Logic Programming...","0 False\nName: influencer, dtype: bool",,0 Civil Engineer with a master's degree in ...
13089,13089,13089,,Lorena Montenegro,https://www.linkedin.com/in/lorena-montenegro-...,Editor (Cs),0 Lorena Montenegro \ndtype: object,,,,...,,,,,"0 Construction\nName: industry, dtype: object",,"0 ['Gestión de proyectos', 'Microsoft Offic...","0 False\nName: influencer, dtype: bool",,0 Master student of Building Technology at ...


In [8]:
skills

[['Next.js',
  'Python (Programming Language)',
  'Django',
  'REST APIs',
  'Problem Solving',
  'Responsive Web Design',
  'Front-End Development',
  'Firebase',
  'Web Development',
  'Communication',
  'Contract Negotiation',
  'Construction',
  'Management',
  'Analytical Skills',
  'Software as a Service (SaaS)',
  'Translation',
  'Technical Translation',
  'English Translation',
  'Russian Translation',
  'Redux.js']]

SyntaxError: invalid syntax (4194720447.py, line 1)

In [None]:
=[]
experinces0=[]
experinces1=[]
skills=[]
influencer=[]
country=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
names=[]
summaries=[]
certifications=[]

In [53]:
import requests
import pandas as pd
import datetime
import csv
import json

positions0=[]
positions1=[]
experinces0=[]
experinces1=[]
skills=[]
influencer=[]
country=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
names=[]
summaries=[]
certifications=[]
# model_crystal\datas\13000_profiles_without_duplicates.csv
df = pd.read_csv('../model_crystal/datas/13000_profiles_without_duplicates.csv')
for csv in range(13091):
	# res=pd.read_json(f'../model_crystal/datas/json_responses/{csv}.json')
	# res=pd.read_csv(f'../model_crystal/datas/responses/{csv}.csv')
	try:
		with open(f'../model_crystal/datas/json_responses/{csv}.json') as f:
			res = json.load(f)
	except:
		print("NAN")
	try:
		skills.append(res['skills'])
	except:
		skills.append(None)
	try:
		position0=res['position_groups'][0]['profile_positions'][0]['title']
		positions0.append(position0)
	except:
		positions0.append(None)

	try:
		position1=res['position_groups'][1]['profile_positions'][0]['title']
		positions1.append(position1)
	except:
		positions1.append(None)

	experince0=0
	try:
		experince0=res['position_groups'][0]['date']['start']['year']-res['position_groups'][0]['date']['end']['year']
		experinces0.append(abs(experince0))
	except:
		try:
			experince0=datetime.date.today().year - res['position_groups'][0]['date']['start']['year']
			experinces0.append(abs(experince0))
		except:
			experinces0.append(None)

	experince1=0
	try:
		experince1=res['position_groups'][1]['date']['start']['year']-res['position_groups'][1]['date']['end']['year']
		experinces1.append(abs(experince1))
	except:
		try:
			experince1=datetime.date.today().year - res['position_groups'][1]['date']['start']['year']
			experinces1.append(abs(experince1))
		except:
			experinces1.append(None)
				
	try:
		influencer.append(res['influencer'])
	except:
		influencer.append(None)
	try:
		country.append(res['location']['country'])
	except:
		country.append(None)
	field_of_study0=""
	try:
		field_of_study0=res['education'][0]['field_of_study']
		field_of_studies0.append(field_of_study0)
	except:
		field_of_study0=None
		field_of_studies0.append(field_of_study0)

	try:
		field_of_study1=res['education'][1]['field_of_study']
		field_of_studies1.append(field_of_study1)
	except:
		field_of_study1=None
		field_of_studies1.append(field_of_study1)

	try:
		degree_name0=res['education'][0]['degree_name']
		degrees0.append(degree_name0)
	except:
		degree_name0=None
		degrees0.append(degree_name0)

	try:
		degree_name1=res['education'][1]['degree_name']
		degrees1.append(degree_name1)
	except:
		degree_name1=None
		degrees1.append(degree_name1)

	try:
		industries.append(res['industry'])
	except:
		industries.append(None)
	try:
		name=res['first_name']+" "+res['last_name']
		names.append(name)
	except:
		names.append(None)
	try:
		if len(res[certifications])==0:
			certifications.append(None)
		for certify in res['certifications']:
			certifications.append(certify['name'])
	except:
		certifications.append(None)
	try:
		summaries.append(res['summary'])
	except:
		summaries.append(None)

df2=pd.DataFrame({
	"name":names,
	'position 1':positions0,
	'position 2':positions1,
	"experince 1":experinces0,
	"experince 2":experinces1,
	"field of studies 1":field_of_studies0,
	"field of studies 2":field_of_studies1,
	"degree 1":degrees0,
	"degree 2":degrees1,
	"industry":industries,
	"certifications":certifications,
	"skills":skills,
	"influencer":influencer,
	"country":country,
	"summary":summaries,
})
d = pd.concat([df, df2], axis=1)
d.to_csv('valuable_data_last.csv')

Laurel Rohloff
Merel Keijzer
Amy MacGregor
Sarah Johnson
Annemarie Voyce
Elise Adayme
Aidan Quinn
Megan Hershey
Andreea (Andi) Dumitru
Claire Niven
Charlie Shader
Deborah Knight
Francesca Vane
Yesica Natalia Chappe
Karen Yaworski, Ph.D 
Dominique Broady, English Marketing Translation Expert
Lilian Sit
Rebeca S. G.
Moira Powers
Marike Duizendstra-Wolters
Ali Shalileh
Ebony Burnside
Cata Amaro Madariaga, M.A.
Keith Martin
Carlos José González Largo
Shahnoza Shamsitdinova
Christine Gardner
Jan Leonard E.
Buffi Duberman
Tom Johnston
Maya Zupnik
Bianca Pellet
Anna Machejko
Siji Jabbar
Nadia van den Berg, MA
Dimitrios Stergioulis
Lauren Rahman
Joy Burrough-Boenisch
Sandra Wormgoor
Serena Lyon
Katharina Pabst
Valerie Graef
Janne van Zanen
Ami Cormack
Karen Hopp
Lucia Grajeda
Kemi Obakin
Erman Gultop
Jitka M.
Agnieszka Karolina Brzozowska
Sandra Norman
Xenia Stroi
Claire Schuyffel
Jackie Wilson
Emily Suff
Sanaz Khelghati
Fransy Vasilaki
Brenda de Jong-Pauley, M.A.
Sandarenu Kumarasamy
Deborah 

In [65]:
# d=d.drop(['Unnamed: 0.1', 'Unnamed: 0'], axis=1)
d.to_csv('valuable_data_lastone.csv')

In [66]:
d

Unnamed: 0,names,links,characters,name,position 1,position 2,experince 1,experince 2,field of studies 1,field of studies 2,degree 1,degree 2,industry,certifications,skills,influencer,country,summary
0,Laurel Rohloff,https://www.linkedin.com/in/laurel-rohloff-a52...,Counselor (Si),Laurel Rohloff,French and English teacher,University Lecturer,2.0,1.0,Linguistics,Études françaises,Master's degree,Bachelor's degree,Primary/Secondary Education,,"[Parler en public, Réseaux sociaux, Planificat...",False,Netherlands,
1,Merel Keijzer,https://www.linkedin.com/in/merel-keijzer-7263...,Influencer (Id),Merel Keijzer,Professor of English Linguistics and English a...,Assistant Professor of Applied Linguistics,,10.0,,Linguistics,Academic Teaching Qualification (BKO),Doctoral degree Linguistics,Research,,"[Statistical Analysis of fMRI Data, Statistica...",False,Netherlands,I am a second language development and bilingu...
2,Amy Macgregor,https://www.linkedin.com/in/amy-macgregor-5325...,Supporter (S),Amy MacGregor,English Teacher (IB MYP),Magazine Translator,7.0,3.0,Education,English/Language Arts Teacher Education,International Post Graduate Certificate in Edu...,CELTA (Certificate in Teaching English to Spea...,Marketing & Advertising,,"[Proofreading, Press Releases, Public Speaking...",False,Netherlands,English teacher with 5 years experience workin...
3,Sarah Johnson,https://www.linkedin.com/in/sarah-johnson-54a4...,Influencer (Id),Sarah Johnson,Youtuber,Private Tutor English - specialist in solving ...,6.0,14.0,The teaching of French and German,,Post Graduate Certificate of Education in Mode...,BA (Hons) European Business Studies,Higher Education,,"[Public Speaking, Coaching, Teaching, Dutch, E...",False,Netherlands,Over a period of more than 10 years of languag...
4,Anne Voyce,https://www.linkedin.com/in/avoyce,Motivator (I),Annemarie Voyce,Teaching Dutch and English as a foreign langua...,English Teacher,0.0,6.0,English; Literature and Visuality,Teaching English as a Second or Foreign Langua...,Master's degree,120-hour Premier TEFL course,Higher Education,,"[Business English, Secondary Education, Teachi...",False,United Kingdom,I previously worked as a secondary school Engl...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
13086,Tosin Obayuwana,https://www.linkedin.com/in/tosin-obayuwana-a3...,Initiator (DI),Tosin Obayuwana,Executive Coordinator,CEO,1.0,12.0,,,,,Management Consulting,,"[Business Analysis, Team Management, Digital M...",False,Nigeria,I am a goal getter that have not been restric...
13087,Carolina Montenegro,https://www.linkedin.com/in/carolina-montenegr...,Encourager (Is),Carolina Montenegro,Advocacy Protection Officer (NEAR/Network for ...,Advocacy 4 Protection expert (NORCAP),1.0,0.0,,,Executive Certificate in Evidence in Public Po...,Executive Certificate in Advocacy in Internati...,International Affairs,,"[Journalism, International Relations, Media Re...",False,Netherlands,
13088,Cristyna Montenegro,https://www.linkedin.com/in/cristynamontenegro,Analyst (C),Cristyna Montenegro,Test Engineer,Engineering Intern,7.0,1.0,Structural Engineering,Civil Engineering,Master of Science (M.Sc.),Bachelor's degree,Research,,"[C++, Programming, Logic Programming, Structur...",False,Netherlands,Civil Engineer with a master's degree in Struc...
13089,Lorena Montenegro,https://www.linkedin.com/in/lorena-montenegro-...,Editor (Cs),Lorena Montenegro,"Senior Sustainability, Health and Well-being c...",Graduation thesis - Internship,4.0,1.0,Building technology- Building physics,disciplina academica,Master of Science - Building Technology,Grado en Arquitectura,Construction,,"[Gestión de proyectos, Microsoft Office, AutoC...",False,Netherlands,Master student of Building Technology at the D...


In [52]:
import json
with open('../model_crystal/datas/json_responses/732.json') as f:
  data = json.load(f)

JSONDecodeError: Expecting value: line 1 column 1 (char 0)

In [42]:
res=pd.read_json(f'../model_crystal/datas/json_responses/0.json')

ValueError: All arrays must be of the same length

In [47]:
res=data
res['position_groups'][0]['profile_positions'][0]

{'location': 'Almere, Flevoland, Pays-Bas',
 'date': {'start': {'month': 8, 'day': None, 'year': 2021},
  'end': {'month': None, 'day': None, 'year': None}},
 'company': 'International School of Almere',
 'description': None,
 'title': 'French and English teacher',
 'employment_type': 'Contract'}

In [1]:
import datetime
import requests
import numpy as np
import json
import pandas as pd
# from tensorflow.keras.models import load_model
# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
data=pd.read_csv("./partly_45878_reach_50.csv")
for i in range(5000, 10000):
  link=data['links'][i]
  id = link.split('/')[4]
  url = "https://api.iscraper.io/v2/profile-details"

  payload = {
    'profile_id': id,
  }

  headers = {
    'X-API-KEY': 'YLPyKt7fl3n05qOsf5rTkoQqWab3xGR6',
  }

  response = requests.post(url, json=payload, headers=headers)
  res=response.json()

  with open(f"./datas/json_responses_50k/{i}.json", "w") as json_file:
      json.dump(res, json_file, indent=4)
  print(i)

5000
5001
5002
5003
5004
5005
5006
5007
5008
5009
5010
5011
5012
5013
5014
5015
5016
5017
5018
5019
5020
5021
5022
5023
5024
5025
5026
5027
5028
