Skip to content

Commit

Permalink
Let's use 'kser' rather than 'ks' or 's' as variable name for Series (#…
Browse files Browse the repository at this point in the history
…1052)

* Let's use 'kser' rather than ks or s for Series

* add

* add missing

* add missing
  • Loading branch information
itholic authored and HyukjinKwon committed Nov 18, 2019
1 parent 169e38e commit 221b5a2
Show file tree
Hide file tree
Showing 8 changed files with 81 additions and 81 deletions.
8 changes: 4 additions & 4 deletions databricks/koalas/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -2464,8 +2464,8 @@ def isnull(self):
1 False True False
"""
kdf = self.copy()
for name, ks in kdf.iteritems():
kdf[name] = ks.isnull()
for name, kser in kdf.iteritems():
kdf[name] = kser.isnull()
return kdf

isna = isnull
Expand Down Expand Up @@ -2499,8 +2499,8 @@ def notnull(self):
1 True False True
"""
kdf = self.copy()
for name, ks in kdf.iteritems():
kdf[name] = ks.notnull()
for name, kser in kdf.iteritems():
kdf[name] = kser.notnull()
return kdf

notna = notnull
Expand Down
12 changes: 6 additions & 6 deletions databricks/koalas/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -1525,17 +1525,17 @@ def _resolve_col(kdf, col_like):
raise ValueError(col_like)


def _spark_col_apply(kdf_or_ks, sfun):
def _spark_col_apply(kdf_or_kser, sfun):
"""
Performs a function to all cells on a dataframe, the function being a known sql function.
"""
from databricks.koalas.frame import DataFrame
from databricks.koalas.series import Series
if isinstance(kdf_or_ks, Series):
ks = kdf_or_ks
return ks._with_new_scol(sfun(ks._scol))
assert isinstance(kdf_or_ks, DataFrame)
kdf = kdf_or_ks
if isinstance(kdf_or_kser, Series):
kser = kdf_or_kser
return kser._with_new_scol(sfun(kser._scol))
assert isinstance(kdf_or_kser, DataFrame)
kdf = kdf_or_kser
sdf = kdf._sdf
sdf = sdf.select([sfun(kdf._internal.scol_for(col)).alias(col) for col in kdf.columns])
return DataFrame(sdf)
64 changes: 32 additions & 32 deletions databricks/koalas/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -702,7 +702,7 @@ def cumprod(scol):
#
# This is a bit hacky. Maybe we should fix it.

return_type = self._ks._internal.spark_type_for(self._ks._internal.column_index[0])
return_type = self._kser._internal.spark_type_for(self._kser._internal.column_index[0])

@pandas_udf(returnType=return_type)
def negative_check(s):
Expand Down Expand Up @@ -1143,13 +1143,13 @@ def idxmax(self, skipna=True):
index = self._kdf._internal.index_columns[0]

stat_exprs = []
for ks in self._agg_columns:
name = ks._internal.data_columns[0]
for kser in self._agg_columns:
name = kser._internal.data_columns[0]

if skipna:
order_column = Column(ks._scol._jc.desc_nulls_last())
order_column = Column(kser._scol._jc.desc_nulls_last())
else:
order_column = Column(ks._scol._jc.desc_nulls_first())
order_column = Column(kser._scol._jc.desc_nulls_first())
window = Window.partitionBy(groupkey_cols).orderBy(order_column)
sdf = sdf.withColumn(name,
F.when(F.row_number().over(window) == 1, scol_for(sdf, index))
Expand All @@ -1160,10 +1160,10 @@ def idxmax(self, skipna=True):
index_map=[(SPARK_INDEX_NAME_FORMAT(i),
s._internal.column_index[0])
for i, s in enumerate(groupkeys)],
column_index=[ks._internal.column_index[0]
for ks in self._agg_columns],
column_scols=[scol_for(sdf, ks._internal.data_columns[0])
for ks in self._agg_columns])
column_index=[kser._internal.column_index[0]
for kser in self._agg_columns],
column_scols=[scol_for(sdf, kser._internal.data_columns[0])
for kser in self._agg_columns])
return DataFrame(internal)

# TODO: add axis parameter
Expand Down Expand Up @@ -1213,13 +1213,13 @@ def idxmin(self, skipna=True):
index = self._kdf._internal.index_columns[0]

stat_exprs = []
for ks in self._agg_columns:
name = ks._internal.data_columns[0]
for kser in self._agg_columns:
name = kser._internal.data_columns[0]

if skipna:
order_column = Column(ks._scol._jc.asc_nulls_last())
order_column = Column(kser._scol._jc.asc_nulls_last())
else:
order_column = Column(ks._scol._jc.asc_nulls_first())
order_column = Column(kser._scol._jc.asc_nulls_first())
window = Window.partitionBy(groupkey_cols).orderBy(order_column)
sdf = sdf.withColumn(name,
F.when(F.row_number().over(window) == 1, scol_for(sdf, index))
Expand All @@ -1230,10 +1230,10 @@ def idxmin(self, skipna=True):
index_map=[(SPARK_INDEX_NAME_FORMAT(i),
s._internal.column_index[0])
for i, s in enumerate(groupkeys)],
column_index=[ks._internal.column_index[0]
for ks in self._agg_columns],
column_scols=[scol_for(sdf, ks._internal.data_columns[0])
for ks in self._agg_columns])
column_index=[kser._internal.column_index[0]
for kser in self._agg_columns],
column_scols=[scol_for(sdf, kser._internal.data_columns[0])
for kser in self._agg_columns])
return DataFrame(internal)

def fillna(self, value=None, method=None, axis=None, inplace=False, limit=None):
Expand Down Expand Up @@ -1713,20 +1713,20 @@ def _reduce_for_stat_function(self, sfun, only_numeric):
column_index = []
if len(self._agg_columns) > 0:
stat_exprs = []
for ks in self._agg_columns:
spark_type = ks.spark_type
name = ks._internal.data_columns[0]
idx = ks._internal.column_index[0]
for kser in self._agg_columns:
spark_type = kser.spark_type
name = kser._internal.data_columns[0]
idx = kser._internal.column_index[0]
# TODO: we should have a function that takes dataframes and converts the numeric
# types. Converting the NaNs is used in a few places, it should be in utils.
# Special handle floating point types because Spark's count treats nan as a valid
# value, whereas Pandas count doesn't include nan.
if isinstance(spark_type, DoubleType) or isinstance(spark_type, FloatType):
stat_exprs.append(sfun(F.nanvl(ks._scol, F.lit(None))).alias(name))
stat_exprs.append(sfun(F.nanvl(kser._scol, F.lit(None))).alias(name))
data_columns.append(name)
column_index.append(idx)
elif isinstance(spark_type, NumericType) or not only_numeric:
stat_exprs.append(sfun(ks._scol).alias(name))
stat_exprs.append(sfun(kser._scol).alias(name))
data_columns.append(name)
column_index.append(idx)
sdf = sdf.groupby(*groupkey_cols).agg(*stat_exprs)
Expand Down Expand Up @@ -1875,8 +1875,8 @@ def _shift(self, periods, fill_value):

class SeriesGroupBy(GroupBy):

def __init__(self, ks: Series, by: List[Series], as_index: bool = True):
self._ks = ks
def __init__(self, kser: Series, by: List[Series], as_index: bool = True):
self._kser = kser
self._groupkeys = by
if not as_index:
raise TypeError('as_index=False only valid with DataFrame')
Expand All @@ -1894,31 +1894,31 @@ def __getattr__(self, item: str) -> Any:

def _diff(self, *args, **kwargs):
groupkey_scols = [s._scol for s in self._groupkeys]
return Series._diff(self._ks, *args, **kwargs, part_cols=groupkey_scols)
return Series._diff(self._kser, *args, **kwargs, part_cols=groupkey_scols)

def _cum(self, func):
groupkey_scols = [s._scol for s in self._groupkeys]
return Series._cum(self._ks, func, True, part_cols=groupkey_scols)
return Series._cum(self._kser, func, True, part_cols=groupkey_scols)

def _rank(self, *args, **kwargs):
groupkey_scols = [s._scol for s in self._groupkeys]
return Series._rank(self._ks, *args, **kwargs, part_cols=groupkey_scols)
return Series._rank(self._kser, *args, **kwargs, part_cols=groupkey_scols)

def _fillna(self, *args, **kwargs):
groupkey_scols = [s._scol for s in self._groupkeys]
return Series._fillna(self._ks, *args, **kwargs, part_cols=groupkey_scols)
return Series._fillna(self._kser, *args, **kwargs, part_cols=groupkey_scols)

def _shift(self, periods, fill_value):
groupkey_scols = [s._scol for s in self._groupkeys]
return Series._shift(self._ks, periods, fill_value, part_cols=groupkey_scols)
return Series._shift(self._kser, periods, fill_value, part_cols=groupkey_scols)

@property
def _kdf(self) -> DataFrame:
return self._ks._kdf
return self._kser._kdf

@property
def _agg_columns(self):
return [self._ks]
return [self._kser]

def _reduce_for_stat_function(self, sfun, only_numeric):
return _col(super(SeriesGroupBy, self)._reduce_for_stat_function(sfun, only_numeric))
Expand Down
52 changes: 26 additions & 26 deletions databricks/koalas/indexing.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,34 +122,34 @@ class AtIndexer(object):
>>> kdf.at[5, 'B']
array([ 4, 20])
"""
def __init__(self, df_or_s):
def __init__(self, kdf_or_kser):
from databricks.koalas.frame import DataFrame
from databricks.koalas.series import Series
assert isinstance(df_or_s, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(df_or_s))
self._df_or_s = df_or_s
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(kdf_or_kser))
self._kdf_or_kser = kdf_or_kser

@property
def _is_df(self):
from databricks.koalas.frame import DataFrame
return isinstance(self._df_or_s, DataFrame)
return isinstance(self._kdf_or_kser, DataFrame)

@property
def _is_series(self):
from databricks.koalas.series import Series
return isinstance(self._df_or_s, Series)
return isinstance(self._kdf_or_kser, Series)

@property
def _internal(self):
return self._df_or_s._internal
return self._kdf_or_kser._internal

def __getitem__(self, key):
if self._is_df:
if not isinstance(key, tuple) or len(key) != 2:
raise TypeError("Use DataFrame.at like .at[row_index, column_name]")
row_sel, col_sel = key
else:
assert self._is_series, type(self._df_or_s)
assert self._is_series, type(self._kdf_or_kser)
if not isinstance(key, str) and len(key) != 1:
raise TypeError("Use Series.at like .at[row_index]")
row_sel = key
Expand Down Expand Up @@ -357,19 +357,19 @@ class LocIndexer(object):
9 7 8
"""

def __init__(self, df_or_s):
def __init__(self, kdf_or_kser):
from databricks.koalas.frame import DataFrame
from databricks.koalas.series import Series
assert isinstance(df_or_s, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(df_or_s))
if isinstance(df_or_s, DataFrame):
self._kdf = df_or_s
self._ks = None
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(kdf_or_kser))
if isinstance(kdf_or_kser, DataFrame):
self._kdf = kdf_or_kser
self._kser = None
else:
# If df_or_col is Column, store both the DataFrame anchored to the Column and
# the Column itself.
self._kdf = df_or_s._kdf
self._ks = df_or_s
self._kdf = kdf_or_kser._kdf
self._kser = kdf_or_kser

def __getitem__(self, key):
from databricks.koalas.frame import DataFrame
Expand All @@ -381,7 +381,7 @@ def raiseNotImplemented(description):
pandas_function=".loc[..., ...]",
spark_target_function="select, where")

rows_sel, cols_sel = _unfold(key, self._ks)
rows_sel, cols_sel = _unfold(key, self._kser)

sdf = self._kdf._sdf
if isinstance(rows_sel, Series):
Expand Down Expand Up @@ -665,19 +665,19 @@ class ILocIndexer(object):
2 1000 3000
"""

def __init__(self, df_or_s):
def __init__(self, kdf_or_kser):
from databricks.koalas.frame import DataFrame
from databricks.koalas.series import Series
assert isinstance(df_or_s, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(df_or_s))
if isinstance(df_or_s, DataFrame):
self._kdf = df_or_s
self._ks = None
assert isinstance(kdf_or_kser, (DataFrame, Series)), \
'unexpected argument type: {}'.format(type(kdf_or_kser))
if isinstance(kdf_or_kser, DataFrame):
self._kdf = kdf_or_kser
self._kser = None
else:
# If df_or_col is Column, store both the DataFrame anchored to the Column and
# the Column itself.
self._kdf = df_or_s._kdf
self._ks = df_or_s
self._kdf = kdf_or_kser._kdf
self._kser = kdf_or_kser

def __getitem__(self, key):
from databricks.koalas.frame import DataFrame
Expand All @@ -690,7 +690,7 @@ def raiseNotImplemented(description):
pandas_function=".iloc[..., ...]",
spark_target_function="select, where")

rows_sel, cols_sel = _unfold(key, self._ks)
rows_sel, cols_sel = _unfold(key, self._kser)

sdf = self._kdf._sdf
if isinstance(rows_sel, Index):
Expand Down
14 changes: 7 additions & 7 deletions databricks/koalas/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -854,7 +854,7 @@ def test_merge(self):

left_kdf = ks.from_pandas(left_pdf)
right_kdf = ks.from_pandas(right_pdf)
right_ks = ks.from_pandas(right_ps)
right_kser = ks.from_pandas(right_ps)

def check(op, right_kdf=right_kdf, right_pdf=right_pdf):
k_res = op(left_kdf, right_kdf)
Expand Down Expand Up @@ -900,22 +900,22 @@ def check(op, right_kdf=right_kdf, right_pdf=right_pdf):
# Test Series on the right
# pd.DataFrame.merge with Series is implemented since version 0.24.0
if LooseVersion(pd.__version__) >= LooseVersion("0.24.0"):
check(lambda left, right: left.merge(right), right_ks, right_ps)
check(lambda left, right: left.merge(right), right_kser, right_ps)
check(lambda left, right: left.merge(right, left_on='x', right_on='x'),
right_ks, right_ps)
right_kser, right_ps)
check(lambda left, right: left.set_index('x').merge(right, left_index=True,
right_on='x'), right_ks, right_ps)
right_on='x'), right_kser, right_ps)

# Test join types with Series
for how in ['inner', 'left', 'right', 'outer']:
check(lambda left, right: left.merge(right, how=how), right_ks, right_ps)
check(lambda left, right: left.merge(right, how=how), right_kser, right_ps)
check(lambda left, right: left.merge(right, left_on='x', right_on='x', how=how),
right_ks, right_ps)
right_kser, right_ps)

# suffix with Series
check(lambda left, right: left.merge(right, suffixes=['_left', '_right'], how='outer',
left_index=True, right_index=True),
right_ks, right_ps)
right_kser, right_ps)

# multi-index columns
left_columns = pd.MultiIndex.from_tuples([('a', 'lkey'), ('a', 'value'), ('b', 'x')])
Expand Down
4 changes: 2 additions & 2 deletions databricks/koalas/tests/test_frame_plot.py
Original file line number Diff line number Diff line change
Expand Up @@ -422,14 +422,14 @@ def check_kde_plot(pdf, kdf, *args, **kwargs):
check_kde_plot(pdf1, kdf1, ind=[1, 2, 3], bw_method=3.0)

def test_missing(self):
ks = self.kdf1
kser = self.kdf1

unsupported_functions = ['box', 'hexbin']

for name in unsupported_functions:
with self.assertRaisesRegex(PandasNotImplementedError,
"method.*DataFrame.*{}.*not implemented".format(name)):
getattr(ks.plot, name)()
getattr(kser.plot, name)()

def test_topn_max_rows(self):

Expand Down
4 changes: 2 additions & 2 deletions databricks/koalas/tests/test_reshape.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ def test_get_dummies(self):
pd.DataFrame({'a': [1, 2, 3, 4, 4, 3, 2, 1],
# 'b': pd.Categorical(list('abcdabcd')),
'b': list('abcdabcd')})]:
kdf_or_ks = ks.from_pandas(pdf_or_ps)
kdf_or_kser = ks.from_pandas(pdf_or_ps)

self.assert_eq(ks.get_dummies(kdf_or_ks), pd.get_dummies(pdf_or_ps), almost=True)
self.assert_eq(ks.get_dummies(kdf_or_kser), pd.get_dummies(pdf_or_ps), almost=True)

def test_get_dummies_object(self):
pdf = pd.DataFrame({'a': [1, 2, 3, 4, 4, 3, 2, 1],
Expand Down
4 changes: 2 additions & 2 deletions databricks/koalas/window.py
Original file line number Diff line number Diff line change
Expand Up @@ -635,7 +635,7 @@ def __init__(self, groupby, groupkeys, window, min_periods=None):
from databricks.koalas.groupby import DataFrameGroupBy

if isinstance(groupby, SeriesGroupBy):
kdf = groupby._ks.to_frame()
kdf = groupby._kser.to_frame()
elif isinstance(groupby, DataFrameGroupBy):
kdf = groupby._kdf
else:
Expand Down Expand Up @@ -1412,7 +1412,7 @@ def __init__(self, groupby, groupkeys, min_periods=1):
from databricks.koalas.groupby import DataFrameGroupBy

if isinstance(groupby, SeriesGroupBy):
kdf = groupby._ks.to_frame()
kdf = groupby._kser.to_frame()
elif isinstance(groupby, DataFrameGroupBy):
kdf = groupby._kdf
else:
Expand Down

0 comments on commit 221b5a2

Please sign in to comment.