## Bu çalışmada fonksiyonel programlamaya odaklanılmıştır.

# GENEL BİLGİLER
### Kolon Bilgisi
#### 11 kolon vardır.
###### 1. work_year : Çalışılan yıl
###### 2. experience_level: Deneyim düzeyi
- EN: Entry-level / Junior (Giriş seviyesi / Yeni başlayan)
- MI: Mid-level / Intermediate (Orta seviye)
- SE: Senior-level / Experienced (Kıdemli / Deneyimli)
- EX: Executive-level / Director (Yönetici / Direktör)

###### 3. employment_type: Sözleşme türü(tam zamanlı gibi)
- FT: Full-time (Tam zamanlı)
- PT: Part-time (Yarı zamanlı)
- CT: Contract (Sözleşmeli)
- FL: Freelance (Serbest)
###### 4. job_title: Çalışanın görevi/işi
###### 5. salary: Maaş
###### 6. salary_currency: Maaşın para cinsi
###### 7. salary_in_usd: Maaşın dolar karşılığı
###### 8. employee_residence: Çalışanın konumu
###### 9. remote_ratio: Çalışanın ne kadar uzaktan çalıştığının oranı
- 0: Hiç uzaktan çalışma yok
- 50: Kısmen uzaktan çalışma, %50 uzaktan çalışma
- 100: Tamamen uzaktan çalışma, %100 uzaktan çalışma
###### 10. company_location: Şirketin konumu
###### 11. company_size: Şirketin büyüklüğü
- S: Small (Küçük) - 1 ila 50 çalışan
- M: Medium (Orta) - 51 ila 250 çalışan
- L: Large (Büyük) - 251 ve üzeri çalışan

In [1]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import OrdinalEncoder
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score 

In [2]:
df = pd.read_csv("Dataset salary 2024.csv")
df.head()

Unnamed: 0,work_year,experience_level,employment_type,job_title,salary,salary_currency,salary_in_usd,employee_residence,remote_ratio,company_location,company_size
0,2024,SE,FT,AI Engineer,202730,USD,202730,US,0,US,M
1,2024,SE,FT,AI Engineer,92118,USD,92118,US,0,US,M
2,2024,SE,FT,Data Engineer,130500,USD,130500,US,0,US,M
3,2024,SE,FT,Data Engineer,96000,USD,96000,US,0,US,M
4,2024,SE,FT,Machine Learning Engineer,190000,USD,190000,US,0,US,M


In [3]:
# Veri seti hakkındaki genel bilgileri veren fonksiyon
def info(data_set):
    print("------------- shape -------------")
    print(data_set.shape)
    print("------------- index -------------")
    print(data_set.index)
    print("------------- columns -------------")
    print(data_set.columns)
    print("------------- info -------------")
    print(data_set.info())

In [4]:
info(df)

------------- shape -------------
(16534, 11)
------------- index -------------
RangeIndex(start=0, stop=16534, step=1)
------------- columns -------------
Index(['work_year', 'experience_level', 'employment_type', 'job_title',
       'salary', 'salary_currency', 'salary_in_usd', 'employee_residence',
       'remote_ratio', 'company_location', 'company_size'],
      dtype='object')
------------- info -------------
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 16534 entries, 0 to 16533
Data columns (total 11 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   work_year           16534 non-null  int64 
 1   experience_level    16534 non-null  object
 2   employment_type     16534 non-null  object
 3   job_title           16534 non-null  object
 4   salary              16534 non-null  int64 
 5   salary_currency     16534 non-null  object
 6   salary_in_usd       16534 non-null  int64 
 7   employee_residence  16534 non-null  

In [5]:
# Veri setindeki kolonların tipini veren fonksiyon. Belirli bir kolon adı da verilebilir.
def types(data_set, col_name = None):
    if col_name == None:
        return data_set.dtypes
    else:
        return data_set[col_name].dtypes

In [6]:
types(df)

work_year              int64
experience_level      object
employment_type       object
job_title             object
salary                 int64
salary_currency       object
salary_in_usd          int64
employee_residence    object
remote_ratio           int64
company_location      object
company_size          object
dtype: object

In [7]:
# Kolonlardaki benzersiz değerleri veren fonksiyon.
def unique(data_set, col_name = None):
    if col_name == None:
        unique_values = {}
        for col in data_set:
            unique_values[col] = data_set[col].unique()
        return unique_values
    else:
        return data_set[col_name].unique()

In [8]:
unique(df, "remote_ratio")

array([  0, 100,  50], dtype=int64)

In [9]:
# İstatistiksel bilgileri veren fonksiyon
def desc(data_set, col_name = None):
    if col_name == None:
        return data_set.describe()
    else:
        return data_set[col_name].describe()

In [10]:
desc(df)

Unnamed: 0,work_year,salary,salary_in_usd,remote_ratio
count,16534.0,16534.0,16534.0,16534.0
mean,2023.226866,163727.0,149686.777973,32.00375
std,0.713558,340205.7,68505.293156,46.245158
min,2020.0,14000.0,15000.0,0.0
25%,2023.0,101763.0,101125.0,0.0
50%,2023.0,142200.0,141300.0,0.0
75%,2024.0,187200.0,185900.0,100.0
max,2024.0,30400000.0,800000.0,100.0


In [11]:
# Veri setindeki verilen kolonu kaldıran fonksiyon.
def remove_col(data_set, col_name):
    data_set.drop(col_name, axis = 1,inplace = True)

In [12]:
remove_col(df, "salary")

In [13]:
remove_col(df, "salary_currency")

job_title, employee_residence ve company_location değişkenleri içerisinde fazla değer barındırdığı ve sayısal veriye one hot encoder ile dönüştürüleceğinden veri setinde çok fazla kolon oluşaçak. Bu fazla değişken sayıları gruplanarak azaltılmıştır.

In [14]:
new_job_title = {
    "Data Science": [
        "Data Science",
        "Data Modeler", "Data Specialist", "Data Modeller",
        "Data Scientist", "Applied Scientist", "Research Scientist", 
        "Data Science Manager", "Data Science Consultant", "Lead Data Scientist",
        "Data Science Analyst", "Data Science Practitioner", "Applied Data Scientist",
        "Data Science Lead", "Director of Data Science", "Managing Director Data Science",
        "Marketing Data Scientist", "Data Science Tech Lead", "Data Scientist Lead",
        "Data Science Engineer", "Data Science Director", "Head of Data Science"
    ],
    "Machine Learning": [
        "Research Engineer","Decision Scientist","Marketing Data Engineer",
        "AI Engineer", "Machine Learning Engineer", "ML Engineer", "NLP Engineer",
        "AI Software Engineer", "Machine Learning Scientist", "AI Research Scientist",
        "AI Scientist", "Machine Learning Developer", "Machine Learning Researcher",
        "Machine Learning Research Engineer", "AI Research Engineer", "AI Programmer",
        "AI Developer", "Computer Vision Engineer", "Deep Learning Engineer",
        "Machine Learning Modeler", "Machine Learning Manager", "Applied Machine Learning Scientist",
        "Principal Machine Learning Engineer", "Staff Machine Learning Engineer",
        "Machine Learning Specialist", "Applied Machine Learning Engineer",
        "Deep Learning Researcher", "Computer Vision Software Engineer", "AI Architect",
        "Head of Machine Learning", "AI Product Manager", "Machine Learning Operations Engineer",
        "MLOps Engineer", "Machine Learning Infrastructure Engineer", "ML Ops Engineer",
        "Machine Learning Software Engineer", "Lead Machine Learning Engineer",
        "Machine Learning Tech Lead"
    ],
    "Data Engineering": [
        "ETL Developer", "Data Integration Developer", "Analytics Engineering Manager",
        "Data Engineer", "Data Integration Engineer", "Data Pipeline Engineer", "Lead Data Engineer",
        "Principal Data Engineer", "Software Data Engineer", "Cloud Data Engineer",
        "Data Infrastructure Engineer", "BI Data Engineer", "Data DevOps Engineer",
        "Big Data Engineer", "Data Operations Engineer", "ETL Engineer", "AWS Data Architect",
        "Cloud Database Engineer", "Big Data Developer", "Cloud Data Architect",
        "Big Data Architect", "Lead Data Engineer"
    ],
    "Business Intelligence": [
        "Business Intelligence","Data Specialist",
        "Business Intelligence Engineer", "Business Intelligence Analyst", "BI Developer",
        "Business Intelligence Manager", "Business Intelligence Specialist",
        "Director of Business Intelligence", "Business Intelligence Lead", "BI Analyst",
        "BI Data Analyst", "Business Intelligence Developer", "Business Intelligence Data Analyst",
        "Power BI Developer", "Head of Business Intelligence"
    ],
    "Analytics": [
        "Data Integration Specialist ", "Lead Data Analyst",
        "Data Analyst", "Research Analyst", "Analytics Engineer", "Data Analytics Specialist",
        "Data Analytics Lead", "Data Analytics Associate", "Data Reporting Analyst",
        "Data Analytics Consultant", "Financial Data Analyst", "Marketing Data Analyst",
        "Data Analytics Engineer", "Data Analytics Manager", "Quantitative Research Analyst",
        "Data Strategy Manager", "Data Analyst Lead", "Business Data Analyst",
        "Sales Data Analyst", "Compliance Data Analyst", "Finance Data Analyst"
    ],
    "Data Management": [
        "Data Visualization Analyst", "Data Specialist",
        "Data Manager", "Data Architect", "Data Operations Manager", "Data Science Manager",
        "Data Management Analyst", "Data Management Consultant", "Data Management Specialist",
        "Manager Data Management", "Data Quality Manager", "Data Operations Specialist",
        "Data Product Manager", "Data Operations Analyst", "Data Product Owner",
        "Head of Data", "Data Strategist", "Data Quality Engineer", "Data Quality Analyst",
        "Data Management Director", "Data Lead", "Data Operations Associate"
    ],
    "Other Related Roles": [
        "Encounter Data Management Professional", "Bear Robotics", "Decision Scientist", "Autonomous Vehicle Technician",
        "Prompt Engineer", "CRM Data Analyst", "Encounter Data Management Professional",
        "Insight Analyst", "Consultant Data Engineer", "Admin & Data Analyst",
        "Data Visualization Specialist", "Data Integration Specialist", "Robotics Engineer",
        "Autonomous Vehicle Technician", "Robotics Software Engineer", "Bear Robotics",
        "Principal Data Scientist", "Principal Data Architect", "Principal Data Analyst",
        "Product Data Analyst", "Data Visualization Engineer", "Azure Data Engineer",
        "Staff Data Analyst", "Principal Data Engineer", "Staff Data Scientist",
        "Applied Research Scientist", "Applied Scientist", "Computational Biologist",
        "Deep Learning Engineer", "Data Developer", "BI Developer"
    ]
}

In [15]:
new_job_title_map = {}
for new_job_title, jobs in new_job_title.items():
    for job in jobs:
        new_job_title_map[job] = new_job_title
        
df['new_job_title'] = df['job_title'].map(new_job_title_map)
remove_col(df, "job_title")

employee_residence GRUPLAMA

In [16]:
new_employee_residence = {
    "North America": ["US", "CA", "MX"],
    
    "South America":["CL", "AR", "BR", "BO", "CO", "EC", "PY", "PE", "UY"],
    
    "Europe":["JE","SE","SI","CZ","NO","MD","BA","FI","ES","GB", "NL", "LT", "DK", "FR", "LV", "GE", "DE", "AT", "HR", "CH", "GR", "PL", "UA", "PT", "IT", "IE", "EE", "MT", "RO", "HU", "BE", "AD", "CY", "BG", "RS", "LU"],
    
    "Africa":["CF","MU","DZ","TN", "ZA", "KE", "NG", "UG", "GH", "EG", "MR", "SC", "TZ", "SN"],

    "Asia":["IQ","MY","AM","UZ","VN","TR","IN" ,"IL", "LB", "AE", "SA", "KG", "KZ", "AZ", "KR", "QA", "RU", "CN", "JP", "HK", "SG", "TH", "ID", "PH", "PK", "LK", "IR", "KW", "BH", "YE", "OM"],
    
    "Oceania":["AS","AU", "NZ", "PG", "FJ",],
    
    "Caribbean":["HN","CR","PR","DO", "HT", "JM", "BB","TT", "AG", "LC", "VC"]
}

In [17]:
new_employee_residence_map = {}
for new_employee_residence, residence in new_employee_residence.items():
    for r in residence:
        new_employee_residence_map[r] = new_employee_residence
        
df['new_employee_residence'] = df['employee_residence'].map(new_employee_residence_map )
remove_col(df, "employee_residence")

company_location GRUPLAMA

In [18]:
new_company_location = {
    "North America": ["US", "CA", "MX"],
    
    "South America":["CL", "AR", "BR", "BO", "CO", "EC", "PY", "PE", "UY"],
    
    "Europe":["GI", "JE","SE","SI","CZ","NO","MD","BA","FI","ES","GB", "NL", "LT", "DK", "FR", "LV", "GE", "DE", "AT", "HR", "CH", "GR", "PL", "UA", "PT", "IT", "IE", "EE", "MT", "RO", "HU", "BE", "AD", "CY", "BG", "RS", "LU"],
    
    "Africa":["CF","MU","DZ","TN", "ZA", "KE", "NG", "UG", "GH", "EG", "MR", "SC", "TZ", "SN"],

    "Asia":["IQ","MY","AM","UZ","VN","TR","IN" ,"IL", "LB", "AE", "SA", "KG", "KZ", "AZ", "KR", "QA", "RU", "CN", "JP", "HK", "SG", "TH", "ID", "PH", "PK", "LK", "IR", "KW", "BH", "YE", "OM"],
    
    "Oceania":["AS","AU", "NZ", "PG", "FJ",],
    
    "Caribbean":["BS","HN","CR","PR","DO", "HT", "JM", "BB","TT", "AG", "LC", "VC"]
}

In [19]:
new_company_location_map = {}
for new_company_location, residence in new_company_location.items():
    for r in residence:
        new_company_location_map[r] = new_company_location
        
df['new_company_location'] = df['company_location'].map(new_company_location_map )
remove_col(df, "company_location")

In [20]:
df

Unnamed: 0,work_year,experience_level,employment_type,salary_in_usd,remote_ratio,company_size,new_job_title,new_employee_residence,new_company_location
0,2024,SE,FT,202730,0,M,Machine Learning,North America,North America
1,2024,SE,FT,92118,0,M,Machine Learning,North America,North America
2,2024,SE,FT,130500,0,M,Data Engineering,North America,North America
3,2024,SE,FT,96000,0,M,Data Engineering,North America,North America
4,2024,SE,FT,190000,0,M,Machine Learning,North America,North America
...,...,...,...,...,...,...,...,...,...
16529,2020,SE,FT,412000,100,L,Data Science,North America,North America
16530,2021,MI,FT,151000,100,L,Other Related Roles,North America,North America
16531,2020,EN,FT,105000,100,S,Data Science,North America,North America
16532,2020,EN,CT,100000,100,L,Analytics,North America,North America


In [21]:
# Aykırı değerleri veren fonksiyon.
def outliers(data_set, col_name, q1 = 0.25, q3 = 0.75):
    quantile1 = data_set[col_name].quantile(q1)
    quantile3 = data_set[col_name].quantile(q3)
    
    iqr = quantile3 - quantile1
    
    up_limit = quantile3 + 1.5 * iqr
    low_limit = quantile1 - 1.5 * iqr
    return low_limit, up_limit

In [22]:
outliers(df, "salary_in_usd")

(-26037.5, 313062.5)

In [23]:
# Aykırı değerlerin indeksini veren fonksiyon.
def outliers_index(data_set, col_name):
    low_limit, up_limit = outliers(data_set, col_name)
    return data_set.loc[(data_set[col_name] < low_limit) | (data_set[col_name] > up_limit), col_name]

In [24]:
outliers_index(df, "salary_in_usd")

6        400000
309      385000
385      370000
393      720000
629      500000
          ...  
16316    450000
16461    416000
16480    325000
16526    423000
16529    412000
Name: salary_in_usd, Length: 283, dtype: int64

In [25]:
# Aykırı değerleri baskılama yöntemi ile çözen fonksiyon.
def outliers_change(data_set, col_name):
    low_limit, up_limit = outliers(data_set, col_name)
    data_set.loc[(data_set[col_name] < low_limit), col_name] = low_limit
    data_set.loc[(data_set[col_name] > up_limit), col_name] = up_limit

In [26]:
outliers_change(df, "salary_in_usd")

In [27]:
# Korelasyon bilgilerini veren fonksiyon.
def correlation(data_set):
    return data_set.corr(numeric_only=True)

In [28]:
correlation(df)

Unnamed: 0,work_year,salary_in_usd,remote_ratio
work_year,1.0,0.074763,-0.20006
salary_in_usd,0.074763,1.0,-0.05616
remote_ratio,-0.20006,-0.05616,1.0


In [29]:
# Label encoding işlemini gerçekleştiren fonksiyon.
def label_encoding(data_set, col_names):
    le = LabelEncoder()
    for col_name in col_names:
        data_set[col_name] = le.fit_transform(data_set[col_name])
    return data_set

In [30]:
label_encoding(df, ["employment_type", "new_employee_residence", "new_job_title", "new_company_location"])

Unnamed: 0,work_year,experience_level,employment_type,salary_in_usd,remote_ratio,company_size,new_job_title,new_employee_residence,new_company_location
0,2024,SE,2,202730.0,0,M,5,4,4
1,2024,SE,2,92118.0,0,M,5,4,4
2,2024,SE,2,130500.0,0,M,2,4,4
3,2024,SE,2,96000.0,0,M,2,4,4
4,2024,SE,2,190000.0,0,M,5,4,4
...,...,...,...,...,...,...,...,...,...
16529,2020,SE,2,313062.5,100,L,4,4,4
16530,2021,MI,2,151000.0,100,L,6,4,4
16531,2020,EN,2,105000.0,100,S,4,4,4
16532,2020,EN,0,100000.0,100,L,0,4,4


In [31]:
# One hot encoding işlemini gerçekleştiren fonksiyon.
def one_hot_encoding(data_set, col_names):
    for col_name in col_names:
        one_hot_encoded = pd.get_dummies(data_set[col_name], prefix=col_name)
        data_set = pd.concat([data_set, one_hot_encoded], axis=1)
        data_set.drop(col_name, axis=1, inplace=True)
    return data_set

In [32]:
new_set = one_hot_encoding(df, ["employment_type", "new_employee_residence", "new_job_title", "new_company_location"])

In [33]:
# Verilerin one_hot_encoding() fonksiyonu uygulandığı halini veri setine entegre etme.
df = new_set

In [34]:
# Ordinal encoding işlemini gerçekleştiren fonksiyon.
def ordinal_encoding(data_set, col_name, categories):
    ol = OrdinalEncoder(categories=categories)
    data_set[col_name] = ol.fit_transform(data_set[[col_name]])
    return data_set

In [35]:
unique(df, "experience_level")

array(['SE', 'MI', 'EN', 'EX'], dtype=object)

In [36]:
experience = [["EN", "MI", "SE", "EX"]]

In [37]:
ordinal_encoding(df, "experience_level", experience)

Unnamed: 0,work_year,experience_level,salary_in_usd,remote_ratio,company_size,employment_type_0,employment_type_1,employment_type_2,employment_type_3,new_employee_residence_0,...,new_job_title_4,new_job_title_5,new_job_title_6,new_company_location_0,new_company_location_1,new_company_location_2,new_company_location_3,new_company_location_4,new_company_location_5,new_company_location_6
0,2024,2.0,202730.0,0,M,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
1,2024,2.0,92118.0,0,M,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
2,2024,2.0,130500.0,0,M,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
3,2024,2.0,96000.0,0,M,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2024,2.0,190000.0,0,M,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16529,2020,2.0,313062.5,100,L,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16530,2021,1.0,151000.0,100,L,0,0,1,0,0,...,0,0,1,0,0,0,0,1,0,0
16531,2020,0.0,105000.0,100,S,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16532,2020,0.0,100000.0,100,L,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [38]:
company = [["S", "M", "", "L"]]

In [39]:
ordinal_encoding(df, "company_size", company)

Unnamed: 0,work_year,experience_level,salary_in_usd,remote_ratio,company_size,employment_type_0,employment_type_1,employment_type_2,employment_type_3,new_employee_residence_0,...,new_job_title_4,new_job_title_5,new_job_title_6,new_company_location_0,new_company_location_1,new_company_location_2,new_company_location_3,new_company_location_4,new_company_location_5,new_company_location_6
0,2024,2.0,202730.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
1,2024,2.0,92118.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
2,2024,2.0,130500.0,0,1.0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
3,2024,2.0,96000.0,0,1.0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2024,2.0,190000.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16529,2020,2.0,313062.5,100,3.0,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16530,2021,1.0,151000.0,100,3.0,0,0,1,0,0,...,0,0,1,0,0,0,0,1,0,0
16531,2020,0.0,105000.0,100,0.0,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16532,2020,0.0,100000.0,100,3.0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [40]:
# Veri setinin tamamıyla sayısal hale gelmiş hali.
df

Unnamed: 0,work_year,experience_level,salary_in_usd,remote_ratio,company_size,employment_type_0,employment_type_1,employment_type_2,employment_type_3,new_employee_residence_0,...,new_job_title_4,new_job_title_5,new_job_title_6,new_company_location_0,new_company_location_1,new_company_location_2,new_company_location_3,new_company_location_4,new_company_location_5,new_company_location_6
0,2024,2.0,202730.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
1,2024,2.0,92118.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
2,2024,2.0,130500.0,0,1.0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
3,2024,2.0,96000.0,0,1.0,0,0,1,0,0,...,0,0,0,0,0,0,0,1,0,0
4,2024,2.0,190000.0,0,1.0,0,0,1,0,0,...,0,1,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16529,2020,2.0,313062.5,100,3.0,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16530,2021,1.0,151000.0,100,3.0,0,0,1,0,0,...,0,0,1,0,0,0,0,1,0,0
16531,2020,0.0,105000.0,100,0.0,0,0,1,0,0,...,1,0,0,0,0,0,0,1,0,0
16532,2020,0.0,100000.0,100,3.0,1,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [41]:
# Verileri bağımlı ve bağımsız değişken olarak, sonrasında da eğitim ve test olarak ayıran fonksiyon.
def split(data_set, col_name):
    x = data_set.drop(col_name, axis=1).values
    y = data_set[col_name].values
    x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.33,random_state=0)
    return x_train, x_test, y_train, y_test

In [42]:
split_result = split(df, "salary_in_usd")

In [43]:
# Linear Model işlemini gerçekleştiren fonksiyon.
def linear_model(x_train, y_train, x_test):
    lin_reg = LinearRegression()
    lin_reg.fit(x_train, y_train)
    return lin_reg.predict(x_test)

In [44]:
# Tahmin etme.
linear_predict = linear_model(x_train=split_result[0], y_train=split_result[2], x_test=split_result[1])

In [45]:
# Performans değerlendirmesi yapan fonksiyon.
def performans(y_test, tahmin):
    return r2_score(y_test, linear_predict)

In [46]:
performans(split_result[3], linear_predict)

0.3060628188247272