<a href="https://colab.research.google.com/github/emredeveloper/Data-Preprocessing/blob/master/Feature_Engineering.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder

# Veri seti örneği oluşturma
data = {
    'ID': range(1, 101),
    'Gender': np.random.choice(['M', 'F'], 100),
    'Age': np.random.randint(18, 70, 100),
    'Income': np.random.uniform(20000, 120000, 100),
    'Purchase Date': pd.date_range(start='2023-01-01', end='2023-06-30', freq='D')[:100],
    'Purchase Amount': np.random.uniform(50, 500, 100)
}

df = pd.DataFrame(data)

In [5]:
# 1. Veri Dönüştürme ve Temizleme
df['Purchase Date'] = pd.to_datetime(df['Purchase Date'])

In [6]:
df['Weekday Date'] = df['Purchase Date'].dt.weekday

In [7]:
df

Unnamed: 0,ID,Gender,Age,Income,Purchase Date,Purchase Amount,Weekday Date
0,1,M,40,99856.694752,1970-01-01 00:00:00.000000006,242.926659,3
1,2,M,46,100203.063038,1970-01-01 00:00:00.000000000,307.102760,3
2,3,F,22,30424.763817,1970-01-01 00:00:00.000000001,315.174469,3
3,4,M,47,36545.574280,1970-01-01 00:00:00.000000002,481.679260,3
4,5,F,41,115188.178597,1970-01-01 00:00:00.000000003,127.655131,3
...,...,...,...,...,...,...,...
95,96,F,20,97853.484600,1970-01-01 00:00:00.000000003,243.802266,3
96,97,M,39,114326.908565,1970-01-01 00:00:00.000000004,362.948329,3
97,98,F,69,109998.935246,1970-01-01 00:00:00.000000005,86.704599,3
98,99,F,61,78864.506765,1970-01-01 00:00:00.000000006,332.831494,3


In [8]:
# Yaş gruplarına ayırma (örneğin genç, yetişkin, yaşlı)
bins = [18, 30, 45, 60, 70]
labels = ['Young', 'Adult', 'Middle-aged', 'Senior']
df['Age Group'] = pd.cut(df['Age'], bins=bins, labels=labels, right=False)

In [9]:
df

Unnamed: 0,ID,Gender,Age,Income,Purchase Date,Purchase Amount,Weekday Date,Age Group
0,1,M,40,99856.694752,1970-01-01 00:00:00.000000006,242.926659,3,Adult
1,2,M,46,100203.063038,1970-01-01 00:00:00.000000000,307.102760,3,Middle-aged
2,3,F,22,30424.763817,1970-01-01 00:00:00.000000001,315.174469,3,Young
3,4,M,47,36545.574280,1970-01-01 00:00:00.000000002,481.679260,3,Middle-aged
4,5,F,41,115188.178597,1970-01-01 00:00:00.000000003,127.655131,3,Adult
...,...,...,...,...,...,...,...,...
95,96,F,20,97853.484600,1970-01-01 00:00:00.000000003,243.802266,3,Young
96,97,M,39,114326.908565,1970-01-01 00:00:00.000000004,362.948329,3,Adult
97,98,F,69,109998.935246,1970-01-01 00:00:00.000000005,86.704599,3,Senior
98,99,F,61,78864.506765,1970-01-01 00:00:00.000000006,332.831494,3,Senior


In [10]:
# 3. Veri Standardizasyonu veya Normalizasyonu
scaler = StandardScaler()
df[['Age', 'Income', 'Purchase Amount']] = scaler.fit_transform(df[['Age', 'Income', 'Purchase Amount']])

# 4. Kategorik Değişkenlerin Kodlanması
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])
df['Age Group'] = le.fit_transform(df['Age Group'])

In [11]:
df

Unnamed: 0,ID,Gender,Age,Income,Purchase Date,Purchase Amount,Weekday Date,Age Group
0,1,1,-0.148707,1.153744,1970-01-01 00:00:00.000000006,-0.149352,3,0
1,2,1,0.244351,1.164884,1970-01-01 00:00:00.000000000,0.333256,3,1
2,3,0,-1.327883,-1.079388,1970-01-01 00:00:00.000000001,0.393956,3,3
3,4,1,0.309861,-0.882525,1970-01-01 00:00:00.000000002,1.646081,3,1
4,5,0,-0.083197,1.646849,1970-01-01 00:00:00.000000003,-1.016201,3,0
...,...,...,...,...,...,...,...,...
95,96,0,-1.458903,1.089315,1970-01-01 00:00:00.000000003,-0.142768,3,3
96,97,1,-0.214217,1.619148,1970-01-01 00:00:00.000000004,0.753218,3,0
97,98,0,1.751076,1.479948,1970-01-01 00:00:00.000000005,-1.324151,3,2
98,99,0,1.226998,0.478574,1970-01-01 00:00:00.000000006,0.526737,3,2


In [12]:
import pandas as pd
from datetime import datetime

# Assuming df is your DataFrame
df = pd.DataFrame({
    'square_footage': [1500, 2000, 1200],
    'number_of_bedrooms': [3, 4, 2],
    'number_of_bathrooms': [2, 2.5, 2],
    'year_built': [1990, 2005, 1985]
})

# Get the current year
current_year = datetime.now().year

# Create a new feature 'age_of_house'
df['age_of_house'] = current_year - df['year_built']

print(df)


   square_footage  number_of_bedrooms  number_of_bathrooms  year_built  \
0            1500                   3                  2.0        1990   
1            2000                   4                  2.5        2005   
2            1200                   2                  2.0        1985   

   age_of_house  
0            34  
1            19  
2            39  


In [14]:
import pandas as pd

# df DataFrame'iniz olduğunu varsayalım
df = pd.DataFrame({
    'hesap_bakiyesi': [5000, 10000, 7500],
    'kredi_skoru': [750, 800, 650],
    'yıllık_geliri': [60000, 80000, 50000],
    'kredi_miktarı': [10000, 20000, 15000],
    'kredi_vadesi': [5, 10, 7],
    'kredi_amacı': ['ev', 'araba', 'eğitim']
})

# Yeni bir özellik 'gelir_kredi_oranı' oluşturun
df['gelir_kredi_oranı'] = df['yıllık_geliri'] / df['kredi_miktarı'] # Yıllık gelir ile kredi miktarı arasındaki oran

print(df)


   hesap_bakiyesi  kredi_skoru  yıllık_geliri  kredi_miktarı  kredi_vadesi  \
0            5000          750          60000          10000             5   
1           10000          800          80000          20000            10   
2            7500          650          50000          15000             7   

  kredi_amacı  gelir_kredi_oranı  
0          ev           6.000000  
1       araba           4.000000  
2      eğitim           3.333333  


In [16]:
import pandas as pd

# df DataFrame'iniz olduğunu varsayalım
df = pd.DataFrame({
    'ürün_fiyatı': [50, 100, 75],
    'ürün_kategorisi': ['elektronik', 'giyim', 'ev'],
    'ürün_stok_miktarı': [1000, 500, 800],
    'satış_miktarı': [200, 100, 300],
    'satış_tarihi': ['2022-01-01', '2022-01-02', '2022-01-03'],
    'müşteri_konumu': ['İstanbul', 'Ankara', 'İzmir']
})

# Yeni bir özellik 'stok_satış_oranı' oluşturun
df['stok_satış_oranı'] = df['ürün_stok_miktarı'] / df['satış_miktarı']

df


Unnamed: 0,ürün_fiyatı,ürün_kategorisi,ürün_stok_miktarı,satış_miktarı,satış_tarihi,müşteri_konumu,stok_satış_oranı
0,50,elektronik,1000,200,2022-01-01,İstanbul,5.0
1,100,giyim,500,100,2022-01-02,Ankara,5.0
2,75,ev,800,300,2022-01-03,İzmir,2.666667
