In [83]:
%matplotlib inline
%load_ext autoreload
%autoreload 2
import os
import sys

import csv
import datetime
import itertools
import numpy as np
import pandas as pd
import random
import re
import sklearn
import time
from collections import defaultdict

import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style("whitegrid")
sns.set_context(rc={
       "figure.figsize": (16, 10),
       "axes.titlesize": 14})

from IPython.display import Image, display
from IPython.core.display import HTML
HTML("<style>.container { width:100% !important; }</style>")

from os.path import expanduser
sys.path.insert(1, '{}/datsci'.format(expanduser('~')))
from datsci import eda, munge
from datsci import kaggle as kg

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [84]:
import santander

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier as KNC
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.ensemble import RandomForestRegressor as RFR
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import AdaBoostClassifier as ABC
from sklearn.ensemble import GradientBoostingClassifier as GBC
from sklearn.linear_model import SGDClassifier as SGDClf

from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import roc_auc_score
from sklearn.metrics import roc_curve, auc
from sklearn.metrics import make_scorer

import xgboost as xgb

In [3]:
FILE_TRAIN                                 = 'data/train.csv'
FILE_TRAIN_DEDUP                           = 'data/train.dedup.csv'
FILE_TRAIN_DEDUP_ONEHOT                    = 'data/train.dedup.onehot.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA                 = 'data/train.dedup.onehot.na.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEAN     = 'data/train.dedup.onehot.na.impute_mean.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN   = 'data/train.dedup.onehot.na.impute_median.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_FREQ     = 'data/train.dedup.onehot.na.impute_freq.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT       = 'data/train.dedup.onehot.na.onehotint.csv'
FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST = 'data/train.dedup.onehot.na.onehotint.1test.csv'

FILE_TEST                                  = 'data/test.csv'
FILE_TEST_DEDUP                            = 'data/test.dedup.csv'
FILE_TEST_DEDUP_ONEHOT                     = 'data/test.dedup.onehot.csv'
FILE_TEST_DEDUP_ONEHOT_NA                  = 'data/test.dedup.onehot.na.csv'
FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEAN      = 'data/test.dedup.onehot.na.impute_mean.csv'
FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN    = 'data/test.dedup.onehot.na.impute_median.csv'
FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_FREQ      = 'data/test.dedup.onehot.na.impute_freq.csv'
FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT        = 'data/test.dedup.onehot.na.onehotint.csv'
FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST  = 'data/test.dedup.onehot.na.onehotint.1test.csv'

FILE_SAMPLE_SUBMIT                         = 'data/sample_submission.csv'

TARGET_COL                                 = 'TARGET'

In [173]:
def get_sizes(train_csv, test_csv):
    df = pd.read_csv(train_csv)
    df_test = pd.read_csv(test_csv, index_col='ID')
    train_rows, train_cols = df.shape
    test_rows, test_cols = df_test.shape
    return train_rows, train_cols, test_rows, test_cols


data_shapes = []
for s, train_csv, test_csv in [
    ('raw',           FILE_TRAIN,                                 FILE_TEST),
    ('dedup',         FILE_TRAIN_DEDUP,                           FILE_TEST_DEDUP),
    ('bin onehot',    FILE_TRAIN_DEDUP_ONEHOT,                    FILE_TEST_DEDUP_ONEHOT),
    ('NaN',           FILE_TRAIN_DEDUP_ONEHOT_NA,                 FILE_TEST_DEDUP_ONEHOT_NA),
    ('impute mean',   FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEAN,     FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEAN),
    ('impute median', FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN,   FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN),
    ('impute freq',   FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_FREQ,     FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_FREQ),
    ('onehot int',    FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT,       FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT),
    ('rm test const', FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST, FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST),]:
    data_shapes.append((s,) + get_sizes(train_csv, test_csv))
pd.DataFrame(data_shapes, columns=['stage', 'train rows', 'train cols', 'test rows', 'test cols'])

Unnamed: 0,stage,train rows,train cols,test rows,test cols
0,raw,76020,371,75818,369
1,dedup,71213,307,75818,306
2,bin onehot,71213,363,75818,362
3,,71213,357,75818,356
4,impute mean,71213,357,75818,356
5,impute median,71179,357,75818,356
6,impute freq,71179,357,75818,356
7,onehot int,71213,398,75818,397
8,rm test const,71213,390,75818,389


## Fix 'delta' cols that contain 9999999999

In [86]:
df_train, df_test, feature_cols = santander.read_data(FILE_TRAIN, FILE_TEST)

In [87]:
ratio_cols = []
for c in df_train:
    if 9999999999 in df[c].unique():
        ratio_cols.append(c)
        
delta_cols = []
for c in df_train:
    if c.find('delta') == 0:
        delta_cols.append(c)

In [6]:
len(ratio_cols), len(delta_cols), ratio_cols == delta_cols

(26, 26, True)

In [7]:
ratio_cols

['delta_imp_amort_var18_1y3',
 'delta_imp_amort_var34_1y3',
 'delta_imp_aport_var13_1y3',
 'delta_imp_aport_var17_1y3',
 'delta_imp_aport_var33_1y3',
 'delta_imp_compra_var44_1y3',
 'delta_imp_reemb_var13_1y3',
 'delta_imp_reemb_var17_1y3',
 'delta_imp_reemb_var33_1y3',
 'delta_imp_trasp_var17_in_1y3',
 'delta_imp_trasp_var17_out_1y3',
 'delta_imp_trasp_var33_in_1y3',
 'delta_imp_trasp_var33_out_1y3',
 'delta_imp_venta_var44_1y3',
 'delta_num_aport_var13_1y3',
 'delta_num_aport_var17_1y3',
 'delta_num_aport_var33_1y3',
 'delta_num_compra_var44_1y3',
 'delta_num_reemb_var13_1y3',
 'delta_num_reemb_var17_1y3',
 'delta_num_reemb_var33_1y3',
 'delta_num_trasp_var17_in_1y3',
 'delta_num_trasp_var17_out_1y3',
 'delta_num_trasp_var33_in_1y3',
 'delta_num_trasp_var33_out_1y3',
 'delta_num_venta_var44_1y3']

In [77]:
x = 17
c = ratio_cols[x]
df[c].value_counts()

 0.000000e+00    75923
 1.000000e+10       70
-1.000000e+00       18
 1.000000e+00        3
 4.000000e+00        2
-3.333333e-01        1
-6.666667e-01        1
 2.500000e+00        1
 5.000000e-01        1
Name: delta_num_compra_var44_1y3, dtype: int64

In [78]:
df_test[c].value_counts()

 0.000000e+00    75716
 1.000000e+10       78
-1.000000e+00       14
 1.000000e+00        3
-8.333333e-01        1
-6.666667e-01        1
 2.000000e-01        1
-8.666667e-01        1
 5.000000e+00        1
-5.000000e-01        1
-7.500000e-01        1
Name: delta_num_compra_var44_1y3, dtype: int64

In [65]:
df[df[c] != 9999999999][c].describe()

count    75650.000000
mean        -0.022113
std          0.147300
min         -1.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          1.000000
Name: delta_num_aport_var13_1y3, dtype: float64

In [66]:
df_test[df_test[c] != 9999999999][c].describe()

count    75441.000000
mean        -0.021805
std          0.147538
min         -1.000000
25%          0.000000
50%          0.000000
75%          0.000000
max          3.000000
Name: delta_num_aport_var13_1y3, dtype: float64

In [66]:
df.shape, df_test.shape

((76020, 372), (75818, 370))

In [67]:
df = munge.hash_features(df, columns=[c])
df_test = munge.hash_features(df_test, columns=[c])

In [68]:
df.shape, df_test.shape

((76020, 373), (75818, 371))

In [None]:
(a - b) / b

In [None]:
a b c
0 0 0
0 1 1
1 0 
1 1 0

In [None]:
change 999999 to very negative numbers, i.e. -10
change 999999 to very positive numbers, i.e. +10
change 999999 to 1
just add 3 columns increased, decreased, and stayed the same, and then change 999999 to null
add 3 columns and remove orig

## Remove duplicates

In [3]:
def dedup_bak():
    # Read data from file
    df = pd.read_csv(FILE_TRAIN, index_col='ID')

    # Remove duplicate rows
    df.drop_duplicates(inplace=True)

    # Remove constant columns
    df.drop(eda.find_const_cols(df), axis=1, inplace=True)
    
    # Remove duplicate columns and then rows again
    df = munge.remove_duplicates(df.T).T.drop_duplicates()
    
    # Write to file
    df.to_csv(FILE_TRAIN_DEDUP, index=False)
    
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)

    # Read in test
    df_test = pd.read_csv(FILE_TEST, index_col='ID')
    df_test[feature_cols].to_csv(FILE_TEST_DEDUP)
        
if not os.path.exists(FILE_TRAIN_DEDUP):
    santander.csv_remove_duplicates_const(FILE_TRAIN, FILE_TEST,
                                          FILE_TRAIN_DEDUP, FILE_TEST_DEDUP)

## One-hot encode binary features

In [4]:
def one_hot_binary_bak():
    df = pd.read_csv(FILE_TRAIN_DEDUP)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = pd.read_csv(FILE_TEST_DEDUP, index_col='ID')
    
    binary_cols = [c for c in df.columns if c[:4] == 'ind_']
    
    # Convert to int
    for c in binary_cols:
        df[c] = df[c].values.astype(int)
    
    df_onehot = munge.hash_features(df, columns=binary_cols)
    df_test_onehot = munge.hash_features(df_test, columns=binary_cols)
    
    df_onehot.to_csv(FILE_TRAIN_DEDUP_ONEHOT, index=False)
    df_test_onehot.to_csv(FILE_TEST_DEDUP_ONEHOT)


if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT):
    santander.csv_one_hot_encode_binary_features(FILE_TRAIN_DEDUP, FILE_TEST_DEDUP,
                                                 FILE_TRAIN_DEDUP_ONEHOT, FILE_TEST_DEDUP_ONEHOT)

## Process known NaNs

https://www.kaggle.com/c/santander-customer-satisfaction/forums/t/19291/data-dictionary/111360#post111360

In [5]:
def process_known_nans():
    df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT, index_col='ID')
    
    # Var3
    df['var3'] = df.var3.replace(-999999, np.nan)
    df_test['var3'] = df_test.var3.replace(-999999, np.nan)
    
    # Find integer features with null values
    for c in feature_cols:
        if df[c].describe()['max'] == 9999999999:
            df[c] = df[c].replace(9999999999, np.nan)
            df_test[c] = df_test[c].replace(9999999999, np.nan)
    
    # Remove constant columns
    df.drop(eda.find_const_cols(df), axis=1, inplace=True)

    # Remove duplicate columns and then rows again
    df = munge.remove_duplicates(df.T).T.drop_duplicates()
    
    # Write to file
    df.to_csv(FILE_TRAIN_DEDUP_ONEHOT_NA, index=False)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test[feature_cols].to_csv(FILE_TEST_DEDUP_ONEHOT_NA)
    
    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA):
    process_known_nans()

## Fill in null values

In [225]:
from sklearn.preprocessing import Imputer

def impute_null_vals(train_csv, test_csv, train_out_csv, test_out_csv, strategy='mean'):

    # Read in data
    df = pd.read_csv(train_csv)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = pd.read_csv(test_csv, index_col='ID')

    # Impute using combined (train + test) datasets
    df_combined = df[feature_cols].append(df_test[feature_cols])
    imputer = Imputer(missing_values='NaN', strategy=strategy, axis=0, verbose=0, copy=False).fit(df_combined)
    df[feature_cols] = imputer.transform(df[feature_cols])
    df_test[feature_cols] = imputer.transform(df_test[feature_cols])
    
    # Remove duplicate columns and rows
    df = munge.remove_duplicates(df.T).T.drop_duplicates()
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = df_test[feature_cols]
    
    # Write to file
    df.to_csv(train_out_csv, index=False)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test[feature_cols].to_csv(test_out_csv)
    
    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEAN):
    impute_null_vals(
        FILE_TRAIN_DEDUP_ONEHOT_NA,               FILE_TEST_DEDUP_ONEHOT_NA,
        FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEAN,   FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEAN,
        strategy='mean'
    )
    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN):
    impute_null_vals(
        FILE_TRAIN_DEDUP_ONEHOT_NA,               FILE_TEST_DEDUP_ONEHOT_NA,
        FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN, FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_MEDIAN,
        strategy='median'
    )
    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_FREQ):
    impute_null_vals(
        FILE_TRAIN_DEDUP_ONEHOT_NA,               FILE_TEST_DEDUP_ONEHOT_NA,
        FILE_TRAIN_DEDUP_ONEHOT_NA_IMPUTE_FREQ,   FILE_TEST_DEDUP_ONEHOT_NA_IMPUTE_FREQ,
        strategy='most_frequent'
    )

## Turn some of the integer columns to categorical features

In [220]:
def one_hot_int():
    df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT_NA)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT_NA, index_col='ID')
    
    # Ignore already-one hot encoded columns
    int_cols = feature_cols[:]
    for c in feature_cols:
        if c[:6] == 'onehot':
            int_cols.remove(c)
           
    # Fine categorical columns
    categorical_cols = eda.find_categorical_columns(df[int_cols], df_test)
    
    # Convert non-null value containing columns to integers
    for c, n in categorical_cols:
        # Dont turn null values to int
        if c not in {'delta_imp_trasp_var17_in_1y3', 'delta_imp_trasp_var33_in_1y3'}:
            df[c] = df[c].values.astype(int)

    # One-hot encode the categorical columns
    catcols = list(map(lambda t: t[0], categorical_cols))
    df_onehot = munge.hash_features(df, columns=catcols)
    df_test_onehot = munge.hash_features(df_test, columns=catcols)

    # Remove duplicate columns and rows
    df_onehot = munge.remove_duplicates(df_onehot.T).T.drop_duplicates()
    feature_cols = list(df_onehot.columns)
    feature_cols.remove(TARGET_COL)
    df_test_onehot = df_test_onehot[feature_cols]
    
    # Save to file
    df_onehot.to_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT, index=False)
    df_test_onehot.to_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT)

    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT):
    one_hot_int()

## Find more categorical columns

### Search for equal num unique, but not equal unique vals

In [3]:
# df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT)
# feature_cols = list(df.columns)
# feature_cols.remove(TARGET_COL)
# df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT, index_col='ID')

# int_cols = feature_cols[:]
# for c in feature_cols:
#     if c[:6] == 'onehot':
#         int_cols.remove(c)
        
# same_counts = []
# for c in int_cols:
#     if df[c].nunique() - 1 == df_test[c].nunique():
#         same_counts.append(c)
        
# x = 20
# c = same_counts[x]
# print(c)
# df[c].value_counts()

# df_test[c].value_counts()

## Remove columns that are constant in test file

In [171]:
def remove_test_const_cols():
    df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT)
    feature_cols = list(df.columns)
    feature_cols.remove(TARGET_COL)
    df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT, index_col='ID')
    
    # Find const cols in test file
    test_const_cols = eda.find_const_cols(df_test)
    
    # Remove const cols
    df.drop(test_const_cols, axis=1, inplace=True)
    df_test.drop(test_const_cols, axis=1, inplace=True)
    
    # Remove duplicate rows
    df.drop_duplicates(inplace=True)
    
    # Save to file
    df.to_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST, index=False)
    df_test.to_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST)
    
if not os.path.exists(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST):
    remove_test_const_cols()

## Generate synthetic records to even out unbalanced data

In [175]:
df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST)
feature_cols = list(df.columns)
feature_cols.remove(TARGET_COL)
df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST, index_col='ID')

In [176]:
df[TARGET_COL].value_counts()

0.0    68398
1.0     2815
Name: TARGET, dtype: int64

In [178]:
float(68398) / 2815

24.297690941385437

In [160]:
df = pd.read_csv(FILE_TRAIN_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST)
feature_cols = list(df.columns)
feature_cols.remove(TARGET_COL)
df_test = pd.read_csv(FILE_TEST_DEDUP_ONEHOT_NA_ONEHOTINT_1TEST, index_col='ID')

In [162]:
saldo_cols = []
for c in df:
    if c.find('saldo') > -1:
        saldo_cols.append(c)

In [164]:
for c in df:
    if c.find('var13') > -1:
        print(c)

num_var13_0
num_var13_largo_0
num_var13_largo
num_var13_medio_0
num_var13
saldo_var13_corto
saldo_var13_largo
saldo_var13_medio
saldo_var13
delta_imp_aport_var13_1y3
delta_num_aport_var13_1y3
imp_aport_var13_hace3
imp_aport_var13_ult1
imp_reemb_var13_ult1
num_aport_var13_hace3
num_aport_var13_ult1
saldo_medio_var13_corto_hace2
saldo_medio_var13_corto_hace3
saldo_medio_var13_corto_ult1
saldo_medio_var13_corto_ult3
saldo_medio_var13_largo_hace2
saldo_medio_var13_largo_hace3
saldo_medio_var13_largo_ult1
saldo_medio_var13_largo_ult3
saldo_medio_var13_medio_hace2
saldo_medio_var13_medio_ult3
onehot_ind_var13_0_0
onehot_ind_var13_0_1
onehot_ind_var13_corto_0_0
onehot_ind_var13_corto_0_1
onehot_ind_var13_corto_0
onehot_ind_var13_corto_1
onehot_ind_var13_largo_0_0
onehot_ind_var13_largo_0_1
onehot_ind_var13_largo_0
onehot_ind_var13_largo_1
onehot_ind_var13_medio_0_0
onehot_ind_var13_medio_0_1
onehot_ind_var13_0
onehot_ind_var13_1
onehot_num_reemb_var13_ult1_0
onehot_num_reemb_var13_ult1_3
oneh

In [163]:
saldo_cols

['saldo_var1',
 'saldo_var5',
 'saldo_var6',
 'saldo_var8',
 'saldo_var12',
 'saldo_var13_corto',
 'saldo_var13_largo',
 'saldo_var13_medio',
 'saldo_var13',
 'saldo_var14',
 'saldo_var17',
 'saldo_var18',
 'saldo_var20',
 'saldo_var24',
 'saldo_var26',
 'saldo_var25',
 'saldo_var30',
 'saldo_var31',
 'saldo_var32',
 'saldo_var33',
 'saldo_var34',
 'saldo_var37',
 'saldo_var40',
 'saldo_var42',
 'saldo_var44',
 'saldo_medio_var5_hace2',
 'saldo_medio_var5_hace3',
 'saldo_medio_var5_ult1',
 'saldo_medio_var5_ult3',
 'saldo_medio_var8_hace2',
 'saldo_medio_var8_hace3',
 'saldo_medio_var8_ult1',
 'saldo_medio_var8_ult3',
 'saldo_medio_var12_hace2',
 'saldo_medio_var12_hace3',
 'saldo_medio_var12_ult1',
 'saldo_medio_var12_ult3',
 'saldo_medio_var13_corto_hace2',
 'saldo_medio_var13_corto_hace3',
 'saldo_medio_var13_corto_ult1',
 'saldo_medio_var13_corto_ult3',
 'saldo_medio_var13_largo_hace2',
 'saldo_medio_var13_largo_hace3',
 'saldo_medio_var13_largo_ult1',
 'saldo_medio_var13_largo_ult

In [169]:
np.array_equal(df[['saldo_var13_corto', 'saldo_var13_largo', 'saldo_var13_medio']].sum(axis=1).values, df['saldo_var13'].values)

True

In [170]:
df[['saldo_var13_corto', 'saldo_var13_largo', 'saldo_var13_medio']].head()

Unnamed: 0,saldo_var13_corto,saldo_var13_largo,saldo_var13_medio
0,0.0,0.0,0.0
1,300.0,0.0,0.0
2,0.0,0.0,0.0
3,0.0,0.0,0.0
4,0.0,0.0,0.0
