In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

# Load the data
data = pd.read_csv('https://raw.githubusercontent.com/fayyoz24/Bots_using_selenium/main/model_crystal/without%20certifications%20linkedin%20profiles.csv')

# Convert non-string values to strings in the feature columns
text_features = data[['position 1', 'position 2', 'field of studies 1', 'field of studies 2', 'degree 1', 'degree 2', 'industry', 'skills', 'influencer', 'country']].copy()

# Handle non-string values in each column
for column in text_features.columns:
    text_features[column] = text_features[column].astype(str)

# Combine all text features into a single string column
text_data = text_features.apply(lambda x: ' '.join(x), axis=1).tolist()
labels = data['characters'].tolist()

# Convert labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
y = np.array([label_mapping[label] for label in labels])

# Update label mapping to start from 0
label_mapping = {label: idx for label, idx in label_mapping.items()}
num_classes = len(label_mapping)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1
sequences = tokenizer.texts_to_sequences(text_data)

# Pad sequences to have consistent length
max_sequence_length = max(len(seq) for seq in sequences)
print(f"max_sequence------>>>>{max_sequence_length}")
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert the data to NumPy arrays
X_text = np.array(padded_sequences)

# Split the data into training and testing sets
X_train_text, X_test_text, y_train, y_test = train_test_split(
    X_text, y, test_size=0.2, random_state=42)

# Textual input branch
text_input = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(vocab_size, 100, input_length=max_sequence_length)(text_input)
lstm_layer = LSTM(100)(embedding_layer)
output_layer = Dense(num_classes, activation='softmax')(lstm_layer)

# Define the model
model = Model(inputs=text_input, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_text, y_train, validation_data=(X_test_text, y_test),
          epochs=10, batch_size=32)

# Save the model
model.save('model.h5')


ModuleNotFoundError: No module named 'numpy'

In [9]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense

# Load the data
data = pd.read_csv('https://raw.githubusercontent.com/fayyoz24/Bots_using_selenium/main/model_crystal/without%20certifications%20linkedin%20profiles.csv')
cols=['position 1', 'position 2', 'field of studies 1','experince 1', 'experince 2', 'field of studies 2',
       'degree 1', 'degree 2', 'industry', 'skills', 'influencer', 'country']

for col in cols:
  data[col] = data[col].fillna("Unknown")
# Convert non-string values to strings in the feature columns
text_features = data[['position 1', 'position 2', "experince 1","experince 2", 
                      'field of studies 1', 'field of studies 2', 
                      'degree 1', 'degree 2', 'industry', 'skills',
                      'influencer', 'country', 'summary']].copy()

# Handle non-string values in each column
for column in text_features.columns:
    text_features[column] = text_features[column].astype(str)

# Combine all text features into a single string column
text_data = text_features.apply(lambda x: ' '.join(x), axis=1).tolist()
labels = data['characters'].tolist()

# Convert labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
y = np.array([label_mapping[label] for label in labels])

# Update label mapping to start from 0
label_mapping = {label: idx for label, idx in label_mapping.items()}
num_classes = len(label_mapping)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1
sequences = tokenizer.texts_to_sequences(text_data)

# Pad sequences to have consistent length
max_sequence_length = max(len(seq) for seq in sequences)
print(max_sequence_length)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert the data to NumPy arrays
X_text = np.array(padded_sequences)

# Split the data into training and testing sets
X_train_text, X_test_text, y_train, y_test = train_test_split(
    X_text, y, test_size=0.1, random_state=42)

# Textual input branch
text_input = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(vocab_size, 100, input_length=max_sequence_length)(text_input)
lstm_layer = LSTM(100)(embedding_layer)
output_layer = Dense(num_classes, activation='softmax')(lstm_layer)

# Define the model
model = Model(inputs=text_input, outputs=output_layer)

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Train the model
model.fit(X_train_text, y_train, validation_data=(X_test_text, y_test),
          epochs=50, batch_size=32)

# Save the model
model.save('model2_test_size=0.1.h5')


523
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [5]:
import datetime
import requests
import numpy as np
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

positions0=[]
positions1=[]
experinces0=[]
experinces1=[]
skills=[]
influencer=[]
country=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
names=[]
summaries=[]
certifications=[]

link="https://www.linkedin.com/in/ismatulla-kuyliev-43286a199/"
id = link.split('/')[4]
url = "https://api.iscraper.io/v2/profile-details"

payload = {
  'profile_id': id,
}

headers = {
  'X-API-KEY': 'hVSqiv11cY1W5YUawXUDLBn0jb4G5W44',
}

response = requests.post(url, json=payload, headers=headers)
res=response.json()
try:
  skills.append(res['skills'])
except:
  skills.append("Unknown")
position0=""
try:
  position0=res['position_groups'][0]['profile_positions'][0]['title']
  positions0.append(position0)
except:
  positions0.append("Unknown")

position1=""
try:
  position1=res['position_groups'][1]['profile_positions'][0]['title']
  positions1.append(position1)
except:
  positions1.append("Unknown")

experince0=0
try:
  experince0=res['position_groups'][0]['date']['start']['year']-res['position_groups'][0]['date']['end']['year']
  experinces0.append(experince0)
except:
  try:
    experince0=datetime.date.today().year - res['position_groups'][0]['date']['start']['year']
    experinces0.append(experince0)
  except:
    experinces0.append("Unknown")

experince1=0
try:
  experince1=res['position_groups'][1]['date']['start']['year']-res['position_groups'][1]['date']['end']['year']
  experinces1.append(experince1.abs())
except:
  try:
    experince1=datetime.date.today().year - res['position_groups'][1]['date']['start']['year']
    experinces1.append(experince1.abs())
  except:
    experinces1.append("Unknown")
      
try:
  influencer.append(res['influencer'])
except:
  influencer.append("Unknown")
try:
  country.append(res['location']['country'])
except:
  country.append("Unknown")
field_of_study0=""
try:
  field_of_study0=res['education'][0]['field_of_study']
  field_of_studies0.append(field_of_study0)
except:
  field_of_study0="Unknown"
  field_of_studies0.append(field_of_study0)

field_of_study1=""
try:
  field_of_study1=res['education'][1]['field_of_study']
  field_of_studies1.append(field_of_study1)
except:
  field_of_study1="Unknown"
  field_of_studies1.append(field_of_study1)

degree_name0=''
try:
  degree_name0=res['education'][0]['degree_name']
  degrees0.append(degree_name0)
except:
  degree_name0="Unknown"
  degrees0.append(degree_name0)

degree_name1=''
try:
  degree_name1=res['education'][1]['degree_name']
  degrees1.append(degree_name1)
except:
  degree_name1="Unknown"
  degrees1.append(degree_name1)

try:
  industries.append(res['industry'])
except:
  industries.append("Unknown")

try:
  if res['summary']:
    summaries.append(res['summary'])
  else:
    summaries.append("Unknown")
except:
  summaries.append("Unknown")

# Load the saved model
model = load_model('model.h5')
a= [
    positions0[0],
    positions1[0],
    skills[0],
    field_of_studies0[0],
    field_of_studies1[0],
    degrees0[0],
    degrees1[0],
    industries[0],
    summaries[0]
]

new_text_data=[]
for i in a:
    if i != "Unknown":
        new_text_data.append(i)
tokenizer = Tokenizer()
tokenizer.fit_on_texts(new_text_data)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1
sequences = tokenizer.texts_to_sequences(new_text_data)

# Pad sequences to have consistent length
max_sequence_length = 519
print(f"max_sequence------>>>>{max_sequence_length}")
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)
# Preprocess the new text data
new_sequences = tokenizer.texts_to_sequences(new_text_data)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_sequence_length)

# Make predictions
predictions = model.predict(new_padded_sequences)

# Get the predicted labels
predicted_labels = np.argmax(predictions, axis=1)

# Map the predicted labels back to original labels
label_mapping={'Analyst (C)': 0, 'Questioner (CD)': 1, 'Captain (D)': 2, 'Encourager (Is)': 3, 'Stabilizer (SC)': 4, 
  'Supporter (S)': 5, 'Counselor (Si)': 6, 'Editor (Cs)': 7, 'Motivator (I)': 8, 'Driver (Di)': 9,
  'Planner (Sc)': 10, 'Skeptic (Cd)': 11, 'Architect (Dc)': 12, 'Driver (Di);': 13, 
  'Influencer (Id)': 14, 'Harmonizer (IS)': 15, 'Initiator (DI)': 16}
reverse_label_mapping = {idx: label for label, idx in label_mapping.items()}
predicted_labels = [reverse_label_mapping[label] for label in predicted_labels]

# Print the predictions
for text, label in zip(new_text_data, predicted_labels):
    print(f"Text: {text}\tPredicted Label: {label}")
print(max(set(predicted_labels), key = predicted_labels.count))

max_sequence------>>>>519
Text: Frontend mentor 	Predicted Label: Supporter (S)
Text: Fullstack | javaScript developer	Predicted Label: Analyst (C)
Text: 0	Predicted Label: Supporter (S)
Text: Petroleum Engineering	Predicted Label: Analyst (C)
Text: Oil and Gas	Predicted Label: Captain (D)
Text: Master of Science - MS	Predicted Label: Analyst (C)
Text: Bachelor's degree	Predicted Label: Editor (Cs)
Text: Computer Software	Predicted Label: Supporter (S)
Text: ['Python (Programming Language)', 'Django', 'REST APIs', 'Problem Solving', 'Responsive Web Design', 'Front-End Development', 'Firebase', 'Web Development', 'Communication', 'Contract Negotiation', 'Construction', 'Management', 'Analytical Skills', 'Software as a Service (SaaS)', 'Translation', 'Technical Translation', 'English Translation', 'Russian Translation', 'Redux.js', 'Git']	Predicted Label: Analyst (C)
Text: I am a skilled Frontend Developer with over 1.5 years of experience and a proven track record of delivering high-qua

In [15]:
predicted_labels

['Supporter (S)',
 'Motivator (I)',
 'Harmonizer (IS)',
 'Questioner (CD)',
 'Counselor (Si)',
 'Supporter (S)',
 'Harmonizer (IS)',
 'Supporter (S)',
 'Harmonizer (IS)',
 'Influencer (Id)']

In [None]:
# new_text_data = [
#     positions0[0],
#     positions1[0],
#     str(experinces0[0]),
#     str(experinces1[0]),
#     field_of_studies0[0],
#     field_of_studies1[0],
#     degrees0[0],
#     degrees1[0],
#     industries[0],
#     skills[0],
#     # str(influencer[0]),
#     # country[0],
#     summaries[0]

# ]


In [59]:
import numpy as np
from tensorflow.keras.models import load_model

# Load the saved model
model = load_model('model.h5')
a= [
    positions0[0],
    positions1[0],
    str(experinces0[0]),
    str(experinces1[0]),
    field_of_studies0[0],
    field_of_studies1[0],
    degrees0[0],
    degrees1[0],
    industries[0],
    skills[0],
    summaries[0]
]
new_text_data=[]
for i in a:
    if i != "Unknown" and i != None:
        new_text_data.append(i)

# Preprocess the new text data
new_sequences = tokenizer.texts_to_sequences(new_text_data)
new_padded_sequences = pad_sequences(new_sequences, maxlen=max_sequence_length)

# Make predictions
predictions = model.predict(new_padded_sequences)

# Get the predicted labels
predicted_labels = np.argmax(predictions, axis=1)

# Map the predicted labels back to original labels
reverse_label_mapping = {idx: label for label, idx in label_mapping.items()}
predicted_labels = [reverse_label_mapping[label] for label in predicted_labels]

# Print the predictions
for text, label in zip(new_text_data, predicted_labels):
    print(f"Text: {text}\tPredicted Label: {label}")
print(max(set(predicted_labels), key = predicted_labels.count))


Text: Sustainability Partner to Boards - Director Marketing & Sales	Predicted Label: Encourager (Is)
Text: Founder	Predicted Label: Captain (D)
Text: 14	Predicted Label: Driver (Di);
Text: Marketing	Predicted Label: Motivator (I)
Text: Post-doctorate degree in Brand Management	Predicted Label: Architect (Dc)
Text: Environmental Services	Predicted Label: Captain (D)
Text: ['Leadership Development', 'Investments', 'Strategy', 'Leadership', 'Sustainability Consulting', 'Marketing Strategy', 'Business Strategy', 'Marketing', 'B2B', 'New Business Development', 'Business Development', 'Marketing Management', 'Entrepreneurship', 'Start-ups', 'Project Planning', 'Supply Chain', 'Management Consulting', 'Change Management', 'FMCG', 'Strategic Planning']	Predicted Label: Motivator (I)
Text: Committed to help leaders at all levels transition their organisations towards businesses that are a force for good... leaving no footprint.

Main fields of expertise: Strategy, Transformation, Marketing & Co

In [46]:
max(set(predicted_labels), key = predicted_labels.count)

'Editor (Cs)'

In [45]:
predicted_labels

['Editor (Cs)',
 'Supporter (S)',
 'Supporter (S)',
 'Editor (Cs)',
 'Editor (Cs)',
 'Captain (D)',
 'Editor (Cs)']

In [26]:
# positions0[0]
# positions1[0]
# str(experinces0[0])
# str(experinces1[0])
# field_of_studies0[0]
# field_of_studies1[0]
# degrees0[0]
# degrees1[0]
# industries[0]
# skills[0]
# str(influencer[0])
# country[0]
summaries[0]

In [27]:
summaries

[None]

In [10]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import EarlyStopping

# Load the data
data = pd.read_csv('https://raw.githubusercontent.com/fayyoz24/Bots_using_selenium/main/model_crystal/without%20certifications%20linkedin%20profiles.csv')
cols = ['position 1', 'position 2', 'field of studies 1', 'experince 1', 'experince 2', 'field of studies 2',
        'degree 1', 'degree 2', 'industry', 'skills', 'influencer', 'country']

for col in cols:
    data[col] = data[col].fillna("Unknown")

# Convert non-string values to strings in the feature columns
text_features = data[
    ['position 1', 'position 2', "experince 1", "experince 2", 'field of studies 1', 'field of studies 2',
     'degree 1', 'degree 2', 'industry', 'skills', 'influencer', 'country', 'summary']].copy()

# Handle non-string values in each column
for column in text_features.columns:
    text_features[column] = text_features[column].astype(str)

# Combine all text features into a single string column
text_data = text_features.apply(lambda x: ' '.join(x), axis=1).tolist()
labels = data['characters'].tolist()

# Convert labels to integers
label_mapping = {label: idx for idx, label in enumerate(set(labels))}
y = np.array([label_mapping[label] for label in labels])

# Update label mapping to start from 0
label_mapping = {label: idx for label, idx in label_mapping.items()}
num_classes = len(label_mapping)

# Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(text_data)
word_index = tokenizer.word_index
vocab_size = len(word_index) + 1
sequences = tokenizer.texts_to_sequences(text_data)

# Pad sequences to have consistent length
max_sequence_length = max(len(seq) for seq in sequences)
print(max_sequence_length)
padded_sequences = pad_sequences(sequences, maxlen=max_sequence_length)

# Convert the data to NumPy arrays
X_text = np.array(padded_sequences)

# Split the data into training and testing sets
X_train_text, X_test_text, y_train, y_test = train_test_split(
    X_text, y, test_size=0.1, random_state=42)

# Textual input branch
text_input = Input(shape=(max_sequence_length,))
embedding_layer = Embedding(vocab_size, 100, input_length=max_sequence_length)(text_input)
lstm_layer = LSTM(100, kernel_regularizer=l2(0.01))(embedding_layer)
dropout_layer = Dropout(0.5)(lstm_layer)  # Add a dropout layer for regularization
output_layer = Dense(num_classes, activation='softmax')(dropout_layer)

# Define the model
model = Model(inputs=text_input, outputs=output_layer)

# Compile the model
learning_rate = 0.001  # Decrease the learning
# Compile the model
learning_rate = 0.001  # Decrease the learning rate

model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Apply early stopping to prevent overfitting
early_stopping = EarlyStopping(patience=5, restore_best_weights=True)

# Train the model
history = model.fit(X_train_text, y_train,
                    validation_data=(X_test_text, y_test),
                    epochs=50,
                    batch_size=32,
                    callbacks=[early_stopping])

# Save the model
model.save('model3.h5')


523


NameError: name 'tf' is not defined

In [1]:
import datetime
import requests
import numpy as np
import json
import pandas as pd
# from tensorflow.keras.models import load_model
# from tensorflow.keras.preprocessing.text import Tokenizer
# from tensorflow.keras.preprocessing.sequence import pad_sequences
data=pd.read_csv("./partly_45878_reach_50.csv")
for i in range(982, 5000):
  link=data['links'][i]
  id = link.split('/')[4]
  url = "https://api.iscraper.io/v2/profile-details"

  payload = {
    'profile_id': id,
  }

  headers = {
    'X-API-KEY': '1MAbPt8f4U14J6XsT5WHxrTdgo1VpPAF',
  }

  response = requests.post(url, json=payload, headers=headers)
  res=response.json()

  with open(f"./datas/json_responses_50k/{i}.json", "w") as json_file:
      json.dump(res, json_file, indent=4)
  print(i)

982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076


In [25]:
import requests
import pandas as pd
import datetime
import csv
import json
positions0=[]
positions1=[]

skills=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
summaries=[]
# model_crystal\datas\13000_profiles_without_duplicates.csv
df = pd.read_csv('../model_crystal/partly_45878_reach_50.csv')
for i in range(len(df)):
    with open(f'./datas/json_responses_50k/{i}.json', 'r') as json_file:
        res = json.load(json_file)
            
    try:
        skills.append(res['skills'])
    except:
        skills.append(None)
    try:
        position0=res['position_groups'][0]['profile_positions'][0]['title']
        print(position0)
        positions0.append(position0)
    except:
        positions0.append(None)

    try:
        position1=res['position_groups'][1]['profile_positions'][0]['title']
        positions1.append(position1)
    except:
        positions1.append(None)

    field_of_study0=""
    try:
        field_of_study0=res['education'][0]['field_of_study']
        field_of_studies0.append(field_of_study0)
    except:
        field_of_study0=None
        field_of_studies0.append(field_of_study0)

    try:
        field_of_study1=res['education'][1]['field_of_study']
        field_of_studies1.append(field_of_study1)
    except:
        field_of_study1=None
        field_of_studies1.append(field_of_study1)

    try:
        degree_name0=res['education'][0]['degree_name']
        degrees0.append(degree_name0)
    except:
        degree_name0=None
        degrees0.append(degree_name0)

    try:
        degree_name1=res['education'][1]['degree_name']
        degrees1.append(degree_name1)
    except:
        degree_name1=None
        degrees1.append(degree_name1)

    try:
        summaries.append(res['summary'])
    except:
        summaries.append(None)
    try:
        industries.append(res['industry'])
    except:
        industries.append(None)

df2=pd.DataFrame({
	'position 1':positions0,
	'position 2':positions1,
	"field of studies 1":field_of_studies0,
	"field of studies 2":field_of_studies1,
	"degree 1":degrees0,
	"degree 2":degrees1,
	"industry":industries,
	"skills":skills,
	"summary":summaries,
})
d = pd.concat([df, df2], axis=1)
d.to_csv('valuable_data_last_50k.csv')

Change Management Manager
Senior Consultant
Colorist
English Second Language Teacher
Researcher and writer on mineral resources scarcity
Senior Policy Advisor
Retired project officer UNDP in post disaster reconstruction and development
Audit Thesis Internship
Solutions Architect
Assistant Contracts Officer at MODIS for ESTEC
Projectcoördinator
Senior Researcher | Assistant professor
Senior Consultant - Strategy & Operations
Medisch directeur
Founder
Senior Advisor Multi Stakeholder Partnerships
Campaign and Outreach Coordinator
Software Engineer
Functional IT Delivery lead
Solutions Analyst - Sustainability Insights
Product Owner
International Affairs Officer
Lead Process Excellence
Director
Honours Programme Master Student
Masters Student
Founder
Assistant Professor
Lecturer
TT Assistant Professor
Water Resources Supervisor in River, Reservoir and Coastal Maintenance Division
Assistant Professor
Design Architect
Postdoctoral Researcher and Project Coordinator
Global CRM and Social Tec

In [None]:
import requests
import pandas as pd
import datetime
import csv
import json
positions0=[]
positions1=[]

skills=[]
field_of_studies0=[]
field_of_studies1=[]
degrees0=[]
degrees1=[]
industries=[]
summaries=[]
# model_crystal\datas\13000_profiles_without_duplicates.csv
with open(f'./datas/json_responses_50k/{i}.json', 'r') as json_file:
    res = json.load(json_file)
        
try:
    skills.append(res['skills'])
except:
    skills.append(None)
try:
    position0=res['position_groups'][0]['profile_positions'][0]['title']
    print(position0)
    positions0.append(position0)
except:
    positions0.append(None)

try:
    position1=res['position_groups'][1]['profile_positions'][0]['title']
    positions1.append(position1)
except:
    positions1.append(None)

field_of_study0=""
try:
    field_of_study0=res['education'][0]['field_of_study']
    field_of_studies0.append(field_of_study0)
except:
    field_of_study0=None
    field_of_studies0.append(field_of_study0)

try:
    field_of_study1=res['education'][1]['field_of_study']
    field_of_studies1.append(field_of_study1)
except:
    field_of_study1=None
    field_of_studies1.append(field_of_study1)

try:
    degree_name0=res['education'][0]['degree_name']
    degrees0.append(degree_name0)
except:
    degree_name0=None
    degrees0.append(degree_name0)

try:
    degree_name1=res['education'][1]['degree_name']
    degrees1.append(degree_name1)
except:
    degree_name1=None
    degrees1.append(degree_name1)

try:
    summaries.append(res['summary'])
except:
    summaries.append(None)
try:
    industries.append(res['industry'])
except:
    industries.append(None)

