-
Notifications
You must be signed in to change notification settings - Fork 310
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Upgrade spark dependency to 3.3.0 #824
Changes from all commits
8221606
edcbb50
7493b35
3badb5b
abd84c6
57d4e32
2714f95
82cca57
f9c4fa8
031c8c8
b1be5ff
4c30169
f80ced7
a84b715
d18df17
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,19 +6,19 @@ import Keys._ | |
object Dependencies { | ||
import DependencyHelpers._ | ||
|
||
val sparkVersion = "3.2.0" | ||
val sparkVersion = "3.3.0" | ||
val scalaTestVersion = "3.0.8" | ||
val junitVersion = "5.8.2" | ||
val akkaVersion = "2.6.14" | ||
val akkaHttpVersion = "10.2.4" | ||
val springBootVersion = "2.6.2" | ||
lazy val logbackVersion = "1.2.3" | ||
lazy val loggingVersion = "3.9.0" | ||
lazy val slf4jVersion = "1.7.30" | ||
lazy val slf4jVersion = "1.7.36" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Old log4j version has conflicts with spark 3.3 dependencies. |
||
lazy val awsSdkVersion = "1.11.1033" | ||
val tensorflowJavaVersion = "0.4.0" // Match Tensorflow 2.7.0 https://github.com/tensorflow/java/#tensorflow-version-support | ||
val xgboostVersion = "1.6.1" | ||
val breezeVersion = "1.0" | ||
val breezeVersion = "1.2" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Keep the same with spark 3.3 breeze dependency. |
||
val hadoopVersion = "2.7.4" // matches spark version | ||
val platforms = "windows-x86_64,linux-x86_64,macosx-x86_64" | ||
val tensorflowPlatforms : Array[String] = sys.env.getOrElse("TENSORFLOW_PLATFORMS", platforms).split(",") | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1026,23 +1026,23 @@ def transform(self, y): | |
:return: | ||
""" | ||
if isinstance(y, pd.DataFrame): | ||
x = y.ix[:,0] | ||
y = y.ix[:,1] | ||
x = y.iloc[:,0] | ||
y = y.iloc[:,1] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Because required pandas version >= 1.05, the |
||
else: | ||
x = y[:,0] | ||
y = y[:,1] | ||
if self.transform_type == 'add': | ||
return pd.DataFrame(np.add(x, y)) | ||
return pd.DataFrame(np.add(x, y), columns=[self.output_features]) | ||
elif self.transform_type == 'sub': | ||
return pd.DataFrame(np.subtract(x, y)) | ||
return pd.DataFrame(np.subtract(x, y), columns=[self.output_features]) | ||
elif self.transform_type == 'mul': | ||
return pd.DataFrame(np.multiply(x, y)) | ||
return pd.DataFrame(np.multiply(x, y), columns=[self.output_features]) | ||
elif self.transform_type == 'div': | ||
return pd.DataFrame(np.divide(x, y)) | ||
return pd.DataFrame(np.divide(x, y), columns=[self.output_features]) | ||
elif self.transform_type == 'rem': | ||
return pd.DataFrame(np.remainder(x, y)) | ||
return pd.DataFrame(np.remainder(x, y), columns=[self.output_features]) | ||
elif self.transform_type == 'pow': | ||
return pd.DataFrame(x**y) | ||
return pd.DataFrame(x**y, columns=[self.output_features]) | ||
Comment on lines
+1035
to
+1045
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Note: A fix. |
||
|
||
def fit_transform(self, X, y=None, **fit_params): | ||
""" | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,4 +3,4 @@ coverage<5.0.0 | |
ipdb | ||
nose | ||
nose-exclude>=0.5.0 | ||
pyspark==3.2.0 | ||
pyspark==3.3.0 |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,7 +1,7 @@ | ||
numpy>=1.8.2 | ||
six>=1.10.0 | ||
scipy>=0.13.0b1 | ||
pandas>=0.18.1, <= 0.24.2 | ||
pandas>=1.0.5 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Spark 3.3 requires pandas>=1.0.5 |
||
scikit-learn>=0.22.0,<0.23.0 | ||
gensim<4.1.0 | ||
urllib3==1.26.5 |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -630,7 +630,7 @@ def math_binary_test(self): | |
|
||
Xres = math_binary_tf.fit_transform(self.df[['a', 'b']]) | ||
|
||
assert_frame_equal(pd.DataFrame(self.df.a + self.df.b, columns=['a']), Xres) | ||
assert_frame_equal(pd.DataFrame(self.df.a + self.df.b, columns=['a_plus_b']), Xres) | ||
|
||
math_binary_tf.serialize_to_bundle(self.tmp_dir, math_binary_tf.name) | ||
|
||
|
@@ -664,7 +664,7 @@ def math_binary_deserialize_add_test(self): | |
|
||
Xres = math_binary_tf.fit_transform(self.df[['a', 'b']]) | ||
|
||
assert_frame_equal(pd.DataFrame(self.df.a + self.df.b, columns=['a']), Xres) | ||
assert_frame_equal(pd.DataFrame(self.df.a + self.df.b, columns=['a_plus_b']), Xres) | ||
|
||
math_binary_tf.serialize_to_bundle(self.tmp_dir, math_binary_tf.name) | ||
|
||
|
@@ -674,15 +674,17 @@ def math_binary_deserialize_add_test(self): | |
|
||
res_a = math_binary_tf.transform(self.df[['a', 'b']]) | ||
res_b = math_binary_ds_tf.transform(self.df[['a', 'b']]) | ||
assert_frame_equal(res_a, res_b) | ||
|
||
# TODO: Deserialization on output_features has some issue. fix this. | ||
# assert_frame_equal(res_a, res_b) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Could you help fixing this ? This is an existing bug but previous test does not cover it. But this is not related to this PR. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. can you make a github issue for this so we don't forget about it? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Filed ticket: #830 |
||
|
||
def math_binary_subtract_test(self): | ||
|
||
math_binary_tf = MathBinary(input_features=['a', 'b'], output_features='a_less_b', transform_type='sub') | ||
|
||
Xres = math_binary_tf.fit_transform(self.df[['a', 'b']]) | ||
|
||
assert_frame_equal(pd.DataFrame(self.df.a - self.df.b, columns=['a']), Xres) | ||
assert_frame_equal(pd.DataFrame(self.df.a - self.df.b, columns=['a_less_b']), Xres) | ||
|
||
math_binary_tf.serialize_to_bundle(self.tmp_dir, math_binary_tf.name) | ||
|
||
|
@@ -716,7 +718,7 @@ def math_binary_multiply_test(self): | |
|
||
Xres = math_binary_tf.fit_transform(self.df[['a', 'b']]) | ||
|
||
assert_frame_equal(pd.DataFrame(self.df.a * self.df.b, columns=['a']), Xres) | ||
assert_frame_equal(pd.DataFrame(self.df.a * self.df.b, columns=['a_mul_b']), Xres) | ||
|
||
math_binary_tf.serialize_to_bundle(self.tmp_dir, math_binary_tf.name) | ||
|
||
|
@@ -746,11 +748,11 @@ def math_binary_multiply_test(self): | |
|
||
def math_binary_divide_test(self): | ||
|
||
math_binary_tf = MathBinary(input_features=['a', 'b'], output_features='a_mul_b', transform_type='div') | ||
math_binary_tf = MathBinary(input_features=['a', 'b'], output_features='a_div_b', transform_type='div') | ||
|
||
Xres = math_binary_tf.fit_transform(self.df[['a', 'b']]) | ||
|
||
assert_frame_equal(pd.DataFrame(self.df.a / self.df.b, columns=['a']), Xres) | ||
assert_frame_equal(pd.DataFrame(self.df.a / self.df.b, columns=['a_div_b']), Xres) | ||
|
||
math_binary_tf.serialize_to_bundle(self.tmp_dir, math_binary_tf.name) | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
[tox] | ||
envlist = py36,py37 | ||
envlist = py37 | ||
skipdist = true | ||
|
||
[testenv] | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Spark 3.3 does not support python3.6