In [26]:
import pandas as pd

In [27]:
# Список CSV-файлов для объединения
csv_files = [
    "revolut_past_key_roles.csv",
    "revolut_current_founders.csv",
    "revolut_stealth_indicators.csv"
]

combined_df = pd.DataFrame()

for file in csv_files:
    df = pd.read_csv(file)
    combined_df = pd.concat([combined_df, df], ignore_index=True)






In [28]:
combined_df = combined_df.drop_duplicates(subset='profile_id')
len(combined_df)


410

In [29]:
combined_df.columns

Index(['profile_id', 'first_name', 'last_name', 'sub_title', 'location_city',
       'location_country', 'li_url', 'skills', 'query_type'],
      dtype='object')

In [30]:
combined_df.isnull().sum()

profile_id            0
first_name            0
last_name             0
sub_title             0
location_city       142
location_country     85
li_url                0
skills               23
query_type            0
dtype: int64

In [82]:
counterS = 0
for row in combined_df.iterrows():
    if 'stealth' in row[1]['sub_title'].lower():
        print(row[1]['sub_title'])
        counterS += 1



co-Founder @ Nobly (acquired by Revolut), founder @ Stealth
Risk Management Expert @ Stealth Startup | CFE
Head of Sales Enablement EMEA/APACx at Stealth Mode Startup
Founding Software Engineer at Stealth startup, ex-Revolut, ex-Yandex
Stealth in Saudi Arabia | Building digital banks & start ups 🚀 | Ex Wio-Bank, Ex-Revolut


In [83]:
counterS

5

In [33]:
for row in combined_df.iterrows():
    for col in row[1].index:
        print(col, row[1][col])
    break

profile_id 0208allenlee
first_name Allen
last_name L.
sub_title Product & Growth Exec | Startup Advisor & Investor
location_city nan
location_country nan
li_url https://www.linkedin.com/in/0208allenlee/
skills Growth, Product, Analytics, Growth Marketing, Product Marketing, Brand Marketing, Partnerships, Business Development, International Expansion, Risk Management, Project Management, Operations, Strategy, Start-ups, Angel Investing
query_type past_roles


# удалю людей у которых роль точно не подойтдет

In [35]:
exclude_roles = [" hr ", "recruiter", "accountant", "legal", "lawyer"]

exclude_pattern = '|'.join(exclude_roles)  


filtered_df = combined_df[
    ~combined_df['sub_title'].str.contains(exclude_pattern, case=False, na=False)
]

filtered_count = len(filtered_df)

print(f"Исключено ролей: {len(combined_df) - filtered_count}")
print(f"Осталось профилей после фильтрации: {filtered_count}")

Исключено ролей: 4
Осталось профилей после фильтрации: 406


In [36]:
filtered_df.to_csv('filtered_df.csv', index=False)

In [37]:
df = pd.read_csv('filtered_df.csv')
df.columns


Index(['profile_id', 'first_name', 'last_name', 'sub_title', 'location_city',
       'location_country', 'li_url', 'skills', 'query_type'],
      dtype='object')

In [67]:
from openai import OpenAI
from typing import Dict, Any
import json
import config

client = OpenAI(api_key=config.OPENAI_API_KEY)

def llm_classifier(sub_title: str, skills: str, model: str) -> Dict[str, Any]:

    prompt = f"""
                Analyze the LinkedIn profile data to detect stealth startups and founder roles. 

                **1. Stealth Startup Indicators**

                a. Direct Evidence (any single mention is enough for `is_stealth = true`):
                    - The word "stealth" (case-insensitive) in the title (e.g., "Stealth Mode," "Stealth Startup")
                    - Terms like "undisclosed," "pre-launch," "unannounced," "confidential," "secret project"
                    - Phrases such as "in stealth" or "in stealth mode"

                b. Indirect Evidence (requires two or more of these signs to set `is_stealth = true`):
                    - No mention of a specific company or organization name (e.g., "Working on AI project" with no company name)
                    - Vague/placeholder descriptions like "New Venture," "Project X," **"Building something new," "building the future,"** 
                        "TBA/TBD project," "stay tuned," "unannounced product," or any synonyms that imply a mysterious or unrevealed project
                    - Very general claims with no real details (e.g., "Building something revolutionary," "Working on a big idea," 
                        "Something exciting coming soon," etc.)
                    - References to "hot" technologies (AI, Blockchain, Web3, Crypto, Quantum, etc.) **without** any concrete context 
                        or company details (indicating a possible stealth R&D effort)

                > **Note**: The model should interpret synonyms of these phrases that indicate a new or not-yet-disclosed project 
                > as potential stealth signals (e.g., "creating the future," "developing something undisclosed," 
                > "launching soon," etc.).

                **2. Founder Role Indicators** 
                - The following **explicit** words/phrases in the title → `is_founder = true`:
                    - "Founder," "Co-founder," "Owner"
                    - "Founding [Role]" (e.g., "Founding Engineer")
                    - References like "Built from scratch," "0 to 1," "my startup"
                - Exclusions (do not count as founder):
                    - "Ex-founder," "Former founder"
                    - "Advisor to startups," "Startup consultant," etc. (an advisory or third-party role, not an active founding member)

                **3. Input Data**:
                - `Current Position`: {sub_title}
                - `Skills`: {skills}

                **4. Output**:
                Return a concise JSON with the following structure:
                ```json
                {{
                    "is_stealth": true or false,
                    "is_founder": true or false,
                    "reason": "short explanation, e.g. 'Stealth in title' or 'No company + vague project'"
                }}
                ```
            """
    
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Analyze LinkedIn profiles to identify stealth startups and founder roles."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=100,
            response_format={"type": "json_object"}  
        )
        
        result = json.loads(response.choices[0].message.content)
        return result
        
    except Exception as e:
        print(f"Ошибка при запросе к OpenAI: {e}")
        return {
            "is_stealth": False,
            "is_founder": False,
            "reason": "API Error"
        }

In [68]:
test_subtitle = "working on new things"
test_skills = "Python, Startups, Product Development"
result = llm_classifier(test_subtitle, test_skills, config.gpt_4o)
print(type(result))
print(result)

<class 'dict'>
{'is_stealth': True, 'is_founder': False, 'reason': 'No company + vague project'}


In [69]:
df['is_stealth'] = False  
df['is_founder'] = False  
df['stealth_reason'] = "" 

In [71]:
for idx, row in df.iterrows():
    try:
        result = llm_classifier(row['sub_title'], row['skills'], config.gpt_4o)
    except Exception as e:
        print(f"Ошибка при запросе к OpenAI: {e}")
        result = {
            "is_stealth": False,
            "is_founder": False,
            "reason": "API Error"
        }
    df.at[idx, 'is_stealth'] = result['is_stealth']
    df.at[idx, 'is_founder'] = result['is_founder']
    df.at[idx, 'stealth_reason'] = result['reason']

In [85]:
df['is_stealth'].value_counts()

is_stealth
False    366
True      40
Name: count, dtype: int64

In [86]:
df['is_founder'].value_counts()

is_founder
False    337
True      69
Name: count, dtype: int64

In [88]:
df_true = df[(df['is_stealth'] == True) | (df['is_founder'] == True)]

105

In [98]:
spec_comp = pd.read_csv('revolut_specific_companies.csv')
len(spec_comp)

33

In [93]:
df_true.columns

Index(['profile_id', 'first_name', 'last_name', 'sub_title', 'location_city',
       'location_country', 'li_url', 'skills', 'query_type', 'is_stealth',
       'is_founder', 'stealth_reason'],
      dtype='object')

# соеденю два датасета(один тот что после классификации с ллм, другой с теми у кого в профиле написано stealth startup)

In [99]:
df_tu_final_analysis = pd.concat([
    df_true[['profile_id', 'first_name', 'last_name', 'sub_title', 'location_city',
       'location_country', 'li_url', 'skills']],
    spec_comp[['profile_id', 'first_name', 'last_name', 'sub_title', 'location_city',
       'location_country', 'li_url', 'skills']]
], ignore_index=True)


In [102]:
len(df_tu_final_analysis), len(df_tu_final_analysis.drop_duplicates(subset='profile_id'))

(138, 124)

In [103]:
df_tu_final_analysis= df_tu_final_analysis.drop_duplicates(subset='profile_id')

In [106]:
df_tu_final_analysis.to_csv('df_tu_final_analysis.csv', index=False)

In [136]:
for row in df_tu_final_analysis.iterrows():
    print(row[1]['profile_id'])
    break


marc-o-brien-1017811


# запрос к проапи посмотреть профиль

In [132]:
import requests

url = "https://api.proapis.com/iscraper/v4/profile-details"

headers = {
    "Content-Type": "application/json",
    "X-Api-Key": config.PRO_API_KEY
}

payload = {
    "profile_id": "alexander-bratz"  # 
}

response = requests.post(url, headers=headers, json=payload)

print(response.json())

{'profile_id': 'alexander-bratz', 'first_name': 'Alexander', 'last_name': 'Bratz', 'sub_title': 'Founder & FinTechie | Ex-Revolut | Ex-N26 |', 'profile_picture': 'https://media.licdn.com/dms/image/v2/C4E03AQFdZbLs4vOLLA/profile-displayphoto-shrink_800_800/profile-displayphoto-shrink_800_800/0/1637056623072?e=1749081600&v=beta&t=lt2vbAfMktDs4pjRUAtLR5ul9ffhtm-Eblm3mfEi23c', 'background_image': None, 'profile_type': 'personal', 'open_to_work': True, 'entity_urn': 'ACoAAAxPoOoB5B5xD7W1S5smLNw-nBgZEv0UkZY', 'object_urn': 206545130, 'birth_date': None, 'summary': 'We provide a single trusted platform solution to solve your B2B cross-border payment needs in over 180 countries and 37 currencies. Get in touch!', 'location': {'country': 'Germany', 'short': 'Constance, Baden-Württemberg', 'city': 'Constance', 'state': 'Baden-Württemberg', 'default': 'Constance, Baden-Württemberg, Germany'}, 'premium': False, 'influencer': False, 'treasury_media': [], 'languages': {'primary_locale': {'country': '

In [133]:
with open('alexander-bratz.json', 'w') as f:
    json.dump(response.json(), f, indent=4)

In [114]:
type(response.json())

dict

In [116]:
d = response.json()
d.keys()

dict_keys(['profile_id', 'first_name', 'last_name', 'sub_title', 'profile_picture', 'background_image', 'profile_type', 'open_to_work', 'entity_urn', 'object_urn', 'birth_date', 'summary', 'location', 'premium', 'influencer', 'treasury_media', 'languages', 'industry', 'education', 'patents', 'awards', 'certifications', 'organizations', 'projects', 'publications', 'courses', 'test_scores', 'position_groups', 'volunteer_experiences', 'skills', 'network_info', 'related_profiles', 'contact_info', 'verifications_info'])

# просто посмотреть структуру json от ответа к профилю

In [131]:
d['position_groups']
for i in d['position_groups']:
    if 'stealth' in i['company']['name'].lower():
        print(i.keys())
        print(type(i['profile_positions']))
        print(i['profile_positions'][0].keys())
        descr = i['profile_positions'][0]['description']
        title = i['profile_positions'][0]['title']
        print(descr, title)
        break


dict_keys(['company', 'date', 'profile_positions'])
<class 'list'>
dict_keys(['location', 'date', 'company', 'description', 'title', 'employment_type'])
The future of e-commerce 🔜 Founder


# пройдусь еще раз ллм что бы найти тех у кого в био у казано что он работает где то конкретно

In [140]:
for sub_title in df_tu_final_analysis['sub_title']:
    print(sub_title)
    


Founder & CEO at Trstpays
Co-Founder & CPO at DolarApp
Founder | Building Cloud 99
Owner and CEO of AaHbit, the best gluten free and refined sugar free confectionery in Japan
Building something new | ex-Revolut, Lyft, YC S20
Building something new | Z-Fellow | ex-Yahoo, ex-Revolut
Something New in Crypto (Ex.Revolut, Goldman Sachs)
Founder
Something new coming soon
Senior Product Manager @ KOMI | ex-Spotify & Revolut | Former Founder and 30U30
The Independent candidate for Limerick Mayor Élection
CEO and Founder at RHH International
Founder & FinTechie | Ex-Revolut | Ex-N26 |
Founder at TravelGenie | Ex-Revolut, UBS
building for merchants across APAC
CEO & Co-Founder
Growth & Innovation Advisor | ex Amazon, Revolut, VC | Impact Founder
Building something new | ex-Revolut
Founder @ Empowr Health
Co-founder at Givver | HRTech | Payroll | Fintech | Startup Advisor | 🏳️‍🌈
Working on something new in the cybersecurity space
Safi CEO - We‘re hiring
Co-founder @paradime.io | I'm hiring 🤘
Co-F

In [146]:
df_tu_final_analysis.iloc[23]['li_url']

'https://www.linkedin.com/in/dave-johns/'

In [152]:
from openai import OpenAI
from typing import Dict, Any
import json
import config

client = OpenAI(api_key=config.OPENAI_API_KEY)

def company_name_classifier(sub_title: str, model: str) -> Dict[str, Any]:

    prompt = f"""
                Analyze the LinkedIn profile title and determine if it contains a CURRENT company name (True/False).

                Rules for identifying current company names:
                1. Company name should be a specific organization name, not an industry or activity description
                2. Current company names often appear after "@", "at", "in", or similar prepositions
                3. If all companies are prefixed with "ex-", "former", or similar, then there is no current company
                4. Generic descriptions like "stealth", "new venture", "something new", "crypto project" are NOT company names
                5. The company name should be for current employment (not past)

                Examples:

                FALSE cases (no current company mentioned):
                - "Building something new | ex-Google" (only past company)
                - "Something new coming soon" (no company name)
                - "Building something new | ex-Revolut, Lyft, YC S20" (only past companies)
                - "Building something new | Z-Fellow | ex-Yahoo, ex-Revolut" (no current company)
                - "Something New in Crypto (Ex.Revolut, Goldman Sachs)" (industry mention, not company name)
                - "Founder & CEO of Stealth Startup" (generic, not a specific company)
                - "Building the future of fintech" (activity description, not company)
                - "Entrepreneur in Residence" (role without company)

                TRUE cases (current company mentioned):
                - "Senior Product Manager @ KOMI | ex-Spotify & Revolut" (KOMI is current)
                - "Chief of Staff @ Simple App | ex-Revolut" (Simple App is current)
                - "Engineering Lead at Monzo Bank" (Monzo Bank is current)
                - "Product Manager @ N26 | Previously Revolut" (N26 is current)
                - "CEO of TechCorp | ex-Google" (TechCorp is current)

                Input title: "{sub_title}"

                Return JSON format:
                {{
                    "has_current_company": boolean,
                    "reason": "explanation of decision in 5-6 words"
                }}
                """
    
    try:
        response = client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": "Analyze LinkedIn profiles to identify is there current company name or not."},
                {"role": "user", "content": prompt}
            ],
            temperature=0.3,
            max_tokens=100,
            response_format={"type": "json_object"}  
        )
        
        result = json.loads(response.choices[0].message.content)
        return result
        
    except Exception as e:
        print(f"Ошибка при запросе к OpenAI: {e}")
        return {
            "has_current_company": False,
            "reason": "API Error"
        }

In [153]:
test_subtitle = "working on new things"
test_skills = "Python, Startups, Product Development"
result = company_name_classifier(test_subtitle, config.gpt_4o)
print(type(result))
print(result)

<class 'dict'>
{'has_current_company': False, 'reason': 'No specific company mentioned'}


In [154]:
from tqdm import tqdm

# Инициализируем новые колонки
df_tu_final_analysis['has_current_company'] = False
df_tu_final_analysis['current_company_reason'] = ""

# Используем tqdm для отображения прогресса
for idx, row in tqdm(df_tu_final_analysis.iterrows(), total=len(df_tu_final_analysis), desc="Анализ профилей"):
    try:
        result = company_name_classifier(row['sub_title'], config.gpt_4o)
        
        # Записываем результаты в DataFrame
        df_tu_final_analysis.at[idx, 'has_current_company'] = result['has_current_company']
        df_tu_final_analysis.at[idx, 'current_company_reason'] = result['reason']
        
    except Exception as e:
        print(f"Ошибка при обработке строки {idx}: {e}")
        df_tu_final_analysis.at[idx, 'current_company_reason'] = f"Error: {str(e)}"


Анализ профилей: 100%|██████████| 124/124 [01:46<00:00,  1.17it/s]


In [175]:
df_tu_final_analysis.value_counts('has_current_company')

has_current_company
False    68
True     56
Name: count, dtype: int64

In [176]:
df_tu_final_classification = df_tu_final_analysis[df_tu_final_analysis['has_current_company'] == False]



In [177]:
df_tu_final_classification.to_csv('df_tu_final_classification.csv', index=False)

In [179]:
df_tu_final_classification.tail(10)

Unnamed: 0,profile_id,first_name,last_name,sub_title,location_city,location_country,li_url,skills,has_current_company,current_company_reason
113,maximshestopalov,Maxim,Shestopalov,Ex-Revolut,,united arab emirates,https://www.linkedin.com/in/maximshestopalov/,"IT Recruitment, Interviewing, Sourcing, Techni...",False,Only past company mentioned
118,jeanna-m,Jeanna,M.,SaaS | Growth Strategist,,united arab emirates,https://www.linkedin.com/in/jeanna-m/,"Communication, Branding, Illustration, Adobe C...",False,No specific company mentioned
120,iamruslana,Ruslana,N.,CBDO | Inspired by Technology | Global Brand |...,amsterdam,netherlands,https://www.linkedin.com/in/iamruslana/,"Working with Investors, Sales Management, Busi...",False,No specific company name mentioned
121,mattialandolfi,Mattia,Landolfi,---,cracow,poland,https://www.linkedin.com/in/mattialandolfi/,"Team Leadership, SQL, Problem Solving, Data An...",False,no specific current company name
123,eduard-t-43589244,Eduard,T.,Strategic Communications | Forbes 30 under 30 ...,brussels,belgium,https://www.linkedin.com/in/eduard-t-43589244/,"Crisis Communications, Strategic Communication...",False,only past company mentioned
127,tej-patel01,Tej,Patel,Cleantech | Ex Revolut | Ex McKinsey,,,https://www.linkedin.com/in/tej-patel01/,"Aerospace, Manufacturing, Root Cause Analysis,...",False,Only past companies mentioned
128,nour-kai-056506268,Nour,Kai,Finance & Tech,,,https://www.linkedin.com/in/nour-kai-056506268/,,False,No specific company mentioned
129,shikha-garg-641460149,Shikha,Garg,Full Stack Developer with 6+ years of experien...,,,https://www.linkedin.com/in/shikha-garg-641460...,,False,No specific current company mentioned
136,alinyaaraujo,Aliny,P.,Lead Operations Manager | Designing processes ...,st albans,united kingdom,https://www.linkedin.com/in/alinyaaraujo/,"Strategic Planning, Process Improvement, Proje...",False,No specific company name mentioned
137,anirudhmanchiraju,Anirudh,Manchiraju,Product Strategy & Operations | Stanford | Duk...,,,https://www.linkedin.com/in/anirudhmanchiraju/,"Data Analysis, Strategy, Project Management, B...",False,No specific current company name


In [180]:
import requests

url = "https://api.proapis.com/iscraper/v4/profile-details"

headers = {
    "Content-Type": "application/json",
    "X-Api-Key": config.PRO_API_KEY
}

payload = {
    "profile_id": "maximshestopalov"   
}

response = requests.post(url, headers=headers, json=payload)

with open('maximshestopalov.json', 'w') as f:
    json.dump(response.json(), f, indent=4)


In [182]:
import json
from typing import Dict, Any

def format_date(date_dict: Dict) -> str:
    if not date_dict or not date_dict.get('year'):
        return None
    
    month = str(date_dict.get('month', 1)).zfill(2)
    year = str(date_dict['year'])
    return f"{year}-{month}"

def extract_profile_info(file_path: str) -> Dict[str, Any]:

    try:
        with open(file_path, 'r', encoding='utf-8') as file:
            json_data = json.load(file)
        
        profile_info = {
            'api_sub_title': json_data['sub_title'],
            'current_position': None
        }
        
        
        if 'position_groups' in json_data and json_data['position_groups']:
            current_group = json_data['position_groups'][0]
            if current_group['profile_positions']:
                current_position = current_group['profile_positions'][0]
                profile_info['current_position'] = {
                    'company': current_group['company']['name'],
                    'title': current_position['title'],
                    'start_date': format_date(current_position['date']['start']),
                    'employment_type': current_position['employment_type'],
                    'location': current_position.get('location', 'N/A')
                }
        
        return profile_info
    
    except Exception as e:
        print(f"Ошибка при обработке файла {file_path}: {str(e)}")
        return None


file_path = "maximshestopalov.json"
profile_data = extract_profile_info(file_path)

if profile_data:
    # заметил что он иногда отличается от того что в линкед ин
    print("\nЗаголовок из API:", profile_data['api_sub_title'])
    
    if profile_data['current_position']:
        current = profile_data['current_position']
        print("\nТекущая позиция:")
        print(f"Компания: {current['company']}")
        print(f"Должность: {current['title']}")
        print(f"Начало работы: {current['start_date']}")
        print(f"Тип занятости: {current['employment_type']}")
        print(f"Локация: {current['location']}")


Заголовок из API: Ex-Revolut

Текущая позиция:
Компания: Stealth Startup
Должность: Operations Manager
Начало работы: 2024-01
Тип занятости: Part-time
Локация: Dubai, United Arab Emirates


# теперь пройду по датасету и сделаю в цикле запросы к апи

In [185]:
import requests
import json
import pandas as pd
from tqdm import tqdm
import os
from typing import Dict, Any
import time

def format_date(date_dict: Dict) -> str:
    if not date_dict or not date_dict.get('year'):
        return None
    
    month = str(date_dict.get('month', 1)).zfill(2)
    year = str(date_dict['year'])
    return f"{year}-{month}"

def extract_profile_info(json_data: Dict) -> Dict[str, Any]:
    try:
        profile_info = {
            'api_sub_title': json_data['sub_title'],
            'current_position': None
        }
        
        if 'position_groups' in json_data and json_data['position_groups']:
            current_group = json_data['position_groups'][0]
            if current_group['profile_positions']:
                current_position = current_group['profile_positions'][0]
                profile_info['current_position'] = {
                    'company': current_group['company']['name'],
                    'title': current_position['title'],
                    'start_date': format_date(current_position['date']['start']),
                    'employment_type': current_position['employment_type'],
                    'location': current_position.get('location', 'N/A')
                }
        
        return profile_info
    
    except Exception as e:
        print(f"Ошибка при обработке данных: {str(e)}")
        return None

def process_profiles(df_input: pd.DataFrame):
    os.makedirs('final_request_to_api', exist_ok=True)
    
    new_data = []
    
    
    url = "https://api.proapis.com/iscraper/v4/profile-details"
    headers = {
        "Content-Type": "application/json",
        "X-Api-Key": config.PRO_API_KEY
    }
    
    
    for _, row in tqdm(df_input.iterrows(), total=len(df_input), desc="Обработка профилей"):
        try:
            
            payload = {"profile_id": row['profile_id']}
            response = requests.post(url, headers=headers, json=payload)
            
            if response.status_code == 200:
                json_data = response.json()
                json_file_path = f"final_request_to_api/{row['profile_id']}.json"
                with open(json_file_path, 'w', encoding='utf-8') as f:
                    json.dump(json_data, f, indent=4, ensure_ascii=False)
                
                profile_info = extract_profile_info(json_data)
                
                if profile_info:
                    profile_data = {
                        'profile_id': row['profile_id'],
                        'first_name': row['first_name'],
                        'last_name': row['last_name'],
                        'linkedin_sub_title': row['sub_title'],
                        'li_url': row['li_url'],
                        'api_sub_title': profile_info['api_sub_title']
                    }
                    
                    if profile_info['current_position']:
                        current = profile_info['current_position']
                        profile_data.update({
                            'current_company': current['company'],
                            'current_title': current['title'],
                            'start_date': current['start_date'],
                            'employment_type': current['employment_type'],
                            'location': current['location']
                        })
                    else:
                        profile_data.update({
                            'current_company': None,
                            'current_title': None,
                            'start_date': None,
                            'employment_type': None,
                            'location': None
                        })
                    
                    new_data.append(profile_data)
            
            else:
                print(f"Ошибка API для {row['profile_id']}: {response.status_code}")
            
            time.sleep(1)
            
        except Exception as e:
            print(f"Ошибка при обработке профиля {row['profile_id']}: {str(e)}")
    
    new_df = pd.DataFrame(new_data)
    new_df.to_csv('profiles_with_current_positions.csv', index=False)
    print("\nДанные сохранены в 'profiles_with_current_positions.csv'")
    
    return new_df

result_df = process_profiles(df_tu_final_classification)

Обработка профилей:  24%|██▎       | 16/68 [00:43<02:19,  2.68s/it]

Ошибка API для íñigo-goizueta-61a614162: 404


Обработка профилей:  57%|█████▋    | 39/68 [01:43<01:16,  2.64s/it]

Ошибка API для david-kfm: 404


Обработка профилей:  87%|████████▋ | 59/68 [02:32<00:19,  2.13s/it]

Ошибка API для jeanna-m: 404


Обработка профилей:  88%|████████▊ | 60/68 [02:34<00:17,  2.17s/it]

Ошибка API для iamruslana: 404


Обработка профилей:  94%|█████████▍| 64/68 [02:44<00:09,  2.43s/it]

Ошибка API для nour-kai-056506268: 404


Обработка профилей: 100%|██████████| 68/68 [02:54<00:00,  2.57s/it]


Данные сохранены в 'profiles_with_current_positions.csv'





In [192]:
df = pd.read_csv('profiles_with_current_positions.csv')


63

In [198]:

rows_to_add = []  # Список для хранения строк

for idx, row in df.iterrows():
    try:
        if 'stealth' in row['current_company'].lower():
            rows_to_add.append(row)  # Добавляем строку в список
    except Exception as e:
        print(f"Ошибка при обработке строки {idx}: {e}")

# Создаем новый DataFrame из списка
new_df = pd.DataFrame(rows_to_add)

Ошибка при обработке строки 20: 'float' object has no attribute 'lower'


In [202]:
new_df.to_csv('founders_in_stealth_companies.csv', index=False)
    
