In [28]:
import pandas as pd
from sklearn.model_selection import train_test_split
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import requests
import json 
import os

In [50]:
"""

Extract data from CSV files in "Prices" folder and combine into a single Pandas dataframe with ticker, date, price, 1-month price change (%), and 3-month price change (%).

"""

# Defines path to files containing price information (just the "Prices" folder)
folder_path = "Prices"

# Initialize an empty list to store the dataframes
dataframes = []

# Iterate over each file in the folder
for filename in os.listdir(folder_path):
    if filename.endswith(".csv"):  # Check if the file is a CSV
        file_path = os.path.join(folder_path, filename)  # Get the path to the file
        
        # Read the CSV file into a pandas dataframe
        df = pd.read_csv(file_path)

        # Add a new column containing the company ticker, which we extract from the filename
        filename_wo_extension = os.path.splitext(filename)[0]
        df['Ticker'] = filename_wo_extension

        condition_one_month = df['Ticker'].shift(-20) == df['Ticker']
        df.loc[condition_one_month, 'OneMonthChange'] = (df['Close'].shift(-20) - df['Close']) / df['Close'] * 100

        condition_three_month = df['Ticker'].shift(-60) == df['Ticker']
        df['ThreeMonthChange'] = (df['Close'].shift(-60) - df['Close']) / df['Close'] * 100
        
        #Creating our y column, Result
        df['Result'] = 0  # Init the column with zeros
        one_month_threshold = 5
        three_month_threshold = 10

        df.loc[(df['OneMonthChange'] > one_month_threshold) & (df['ThreeMonthChange'] > three_month_threshold), 'Result'] = 1
        df.loc[(df['OneMonthChange'] < -one_month_threshold) & (df['ThreeMonthChange'] < -three_month_threshold), 'Result'] = -1
        
        # Append the DataFrame to the list
        dataframes.append(df)

# Concatenate all the DataFrames in the list into a single DataFrame
prices_df = pd.concat(dataframes, ignore_index=True)

# Be careful--the below filters out any features not specified so adding a feature above won't show up if you don't modify the below line.
prices_df = prices_df[["Date", "Close", "Ticker", "OneMonthChange", "ThreeMonthChange", "Result"]]

# prices_df has all the CSV files' contents in a single DataFrame
print(prices_df)

                            Date       Close Ticker  OneMonthChange  \
0      1999-11-18 00:00:00-05:00   26.652401      A        4.403417   
1      1999-11-19 00:00:00-05:00   24.456602      A       16.099074   
2      1999-11-22 00:00:00-05:00   26.652401      A        5.965871   
3      1999-11-23 00:00:00-05:00   24.229450      A       18.906221   
4      1999-11-24 00:00:00-05:00   24.873049      A       21.156744   
...                          ...         ...    ...             ...   
92411  2024-04-25 00:00:00-04:00  246.339996    ADP             NaN   
92412  2024-04-26 00:00:00-04:00  243.070007    ADP             NaN   
92413  2024-04-29 00:00:00-04:00  243.949997    ADP             NaN   
92414  2024-04-30 00:00:00-04:00  241.889999    ADP             NaN   
92415  2024-05-01 00:00:00-04:00  247.330002    ADP             NaN   

       ThreeMonthChange  Result  
0             86.079527       0  
1            100.619218       1  
2            120.454482       1  
3          

Need to merge prices with earnings transcript data here.

In [51]:
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments
from sklearn.model_selection import train_test_split

#Will have some placeholder at first to test this
texts = ["good test", "bad test"]

#Make label.
prices_df["Label"] = 1
df.loc[(df['Result'] < 1), 'Label'] = 0

# Split data into training and validation sets

# Load pre-trained BERT tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

# Tokenize texts

# Convert labels to tensors

# Load pre-trained BERT model for sequence classification
model = BertForSequenceClassification.from_pretrained('bert-base-uncased', num_labels=2)  # For now, will assume binary classification

# Need to define training arguments here 

# Need to define the trainer here

# Need to fine-tune BERT on our data here
#trainer.train()

# Need to evaluate our work here
#trainer.evaluate()


Some weights of BertForSequenceClassification were not initialized from the model checkpoint at bert-base-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [32]:
from itertools import combinations
from sklearn.linear_model import LogisticRegression

#LR = LogisticRegression(max_iter=1000)

#Below is extremely straightforward logistic regression code to train the model on the quantitative outputs BERT pumps out
#LR.fit(X_train[cols], y_train)
#print(LR.score(X_train[cols], y_train))

In [15]:
import json
import re

# Messy text
messy_text = """
Douglas Anmuth -- JP Morgan -- Analyst

Thanks for taking the question. Ruth, just first on the accounting change. I was just hoping you could clarify. We're trying to normalize that. Is it right that we'd adding back about $632 million to operating income and then reducing EPS by $340 million? And then just on the EPS side, perhaps adjusting for the tax rate. And then just in terms of the business, I just wanted to ask Waymo. If you could talk a little bit about just the latest timing for the commercial launch in Phoenix, and how quickly you'd look to expand to other markets. And then just how you're thinking about the technology and whether you'll license it to others going forward or keep it more proprietary for Waymo services. Thanks.

Ruth Porat -- Chief Financial Officer

Sure. So on the accounting standard, we tried to lay out all the component parts clearly on the cover of the earnings release so that you would have it all in one place. I think you summarized it right, but I'll just direct everybody to the earnings release. You know, the net of which was the gain from the equity investments was $2.4 billion to net income. That is net of performance fees, as well as the release of a deferred tax asset that we have. So it does reflect $3 billion in gains.

And I think you know this, but this quarter, the accounting standard requires marks for everything where there is an observable raise, so these are unrealized, the majority of them are unrealized, not actually monetized by Alphabet. Then the performance fees are calculated based on investment returns. They're accrued but not paid until an exit event occurs and they do appear in OpEx. As you noted, there's also therefore the benefit that flows through on the tax line. That is 5 percentage points of benefit offset to the effective tax rate for the quarter.

As it relates to your Waymo question. There was a lot in there. We do remain very excited about the opportunity with Waymo and our continued progress on multiple fronts. It is still very early in terms of our progress. This year is about offering a service that is safe, that works, that delights users in the Phoenix area. The rider program in Phoenix is open to members of the public and riders will use a Waymo app to hail one of our fully self-driving cars without a driver at the wheel and will pay for the service.

We've also had progress on the vehicle partnerships. As I mentioned in my opening comments, last month Waymo announced it signed a long-term strategic partnership with Jaguar, beginning with a collaboration to design and manufacture self-driving I-PACE vehicles for Waymo's transportation service. These are all electric cars. This new partnership in the vehicles adds to our strong position with FCA, and the production of the cars begins in 2020. And then we are expanding our testing to more states. We're also working on additional areas, like applying the technology to logistics and deliveries and working with cities to help strengthen public transportation and for personal use vehicles.

As we've talked about on a bunch of calls, the opportunity is here for us because we started with safety and we remain a leader in safety, and we do believe that's the foundation for success and it builds on the all the test miles that we've done. So we keep coming back to when you create vehicles that drive themselves safely, we think there's a lot of potential uses and business opportunities and that's what we're focused on.

Douglas Anmuth -- JP Morgan -- Analyst

Thank you, Ruth.

Operator

Thank you. Our next question comes from Heather Bellini of Goldman Sachs. Your line is now open.

Heather Bellini -- Goldman Sachs -- Analyst

Great, thank you. I wanted to ask two quick questions. One, just one on GDPR, and then one on Cloud. On GDPR, I was just wondering if you could share with us kind of any impact you're thinking about if the implementation occurs later in May. And so, any thoughts you could you could share there would be great. And then on Cloud, Sundar, you had mentioned you're seeing a lot of momentum. You said G Suite I believe accelerated in Q1. I was wondering if there was any color on the GCP side that you could share from a growth perspective if that business accelerated or not, and how were the deal sizes trending for that business in particular.

Thank you.

Sundar Pichai -- Chief Executive Officer

Great. Maybe I'll do the GDPR first. GDPR, I realize, is a fairly new public topic, but for us it's not new. We started working on GDPR compliance over 18 months ago, and have been very engaged on it. It's really important and we care about getting it right. Overall, we have long had a very robust and strong privacy program at Google, too.

So, we are committed to meeting requirements on May 25th, and also the long-term. We are working very closely with advertisers, publishers, and our partners. We'll also update all the privacy policies and controls we provide to users worldwide. So it's a big effort. We are very committed to it. We are very focused on getting it right by our users and partners. That's where our focus is now.

Heather, on Cloud, I guess your question was about overall growth. The momentum has been very strong on Cloud as well. We hadn't talked about G Suite much, and so we highlighted the momentum there, but Cloud is continuing its great growth. We are seeing it across the board. Things worth I would call out is we are seeing larger deals as well. We're seeing good synergies between G Suite and Cloud. Areas where we have done acquisitions like Apogee. They are beginning to work in terms of driving synergies to Cloud.

The efforts we're beginning to put together with our partners. That is beginning to bear fruit as well. So, we have go-to-market programs now with SAP, CISCO, and Salesforce. I think we are beginning to see early results from that and hopefully that translates into more momentum going forward.

Heather Bellini -- Goldman Sachs -- Analyst

Thank you.

Operator

Thank you. Our next question comes from Eric Sheridan of UBS. Your line is now open.

Eric J. Sheridan -- UBS Securities LLC -- Analyst

Thank you very much for taking the question. Maybe two for Sundar, if I can. One, on mobile search, you continue to call that out as a point of strength in the results. What are you most excited about in terms of either product innovation or the ability to get consumers to adopt mobile search more broadly on devices globally, which could lead to more ad budgets moving into mobile search? And then on hardware, you've now been through two years of sort of Pixel devices. You've made the Aqua hire of VHCC engineers. Can you give us a sense of what you've learned so far from your hardware efforts and how that might evolve product innovation or go to market strategies long term? Thanks so much.

Sundar Pichai -- Chief Executive Officer

Good. On mobile search for me, mobile obviously raises the bar. If you look at the evolution of search, we evolved to stay ahead of user expectations and we evolved from just providing links to answers. I just feel at a high level, the next big evolution we are doing as part of mobile search and Assistant is to actually help users complete actions; to help get things done. And you know, that's really hard to do at scale and that's the work we are doing. As we do that, it'll impact just not just Assistant, but mobile search more broadly. Obviously, there is a commercial impact as well. So we continue to be very excited about the opportunities there.

On Hardware, the exciting part for us is now I think you know we have all the end-to-end capabilities of a world-class hardware organization, along with the quality of the software organizations we've always had. In this area, it truly takes long-term planning. And so, for example, if you think about Silicon, etc., the longer you can do it, the more advantages you have. I definitely feel we are taking the steps toward being able to do this well for the long-term. Part of that obviously involves scaling up our go-to-market strategies both in the U.S. and internationally so that we can drive adoption.

I said earlier our net promoter scores show that we are right up there with the best-in-class devices and across all the products we have, not just our Pixel. Across our Nest family and everything we do. So, the opportunity is clearly there. We are going to lean into it. It takes two to three years to really get to the scale where we want to see it, but we are committed to getting there.

Eric J. Sheridan -- UBS Securities LLC -- Analyst

Thanks so much.

Operator

Thank you. Our next question comes from Mark Mahaney of RBC Capital Markets. Your line is now open.

Mark Mahaney -- RBC Capital Markets -- Analyst

Great, thanks. I want to follow up on Heather's question on GDPR. And the question I want to ask is, I understand that you've been working for a long time to make sure that you're compliant, but do you think that GDPR or other regulation that you see on the horizon is likely to impact materially the targeting capabilities that advertisers have on Google? Is there something in the regulation that's going to make Google and its properties less attractive to advertisers? That's the action question I want to ask. Thank you.

Sundar Pichai -- Chief Executive Officer

Thanks, Mark. Above everything else, as we're working through GDPR, we are making sure we are focused on getting that user experience right for our users and our partners. But to clarify your question further, first of all, it's important to understand that most of our ad business is search, rely on very limited information. Essentially, [inaudible] [01:02:19] the key words to show a relevant ad or product. We've been preparing this for 18 months, and I think we are focused on getting the compliance right. It will be a years' long effort and we are helping not just us, but our publishers and partners. But overall, we think we'll be able to do all that with a positive impact for users, and publishers, and advertisers, and so our business.

Mark Mahaney -- RBC Capital Markets -- Analyst

Okay, thank you, Sundar.

Operator

Thank you. Our next question comes from Brian Nowak of Morgan Stanley. Your line is now open.

Brian Nowak -- Morgan Stanley & Co. LLC -- Analyst

Thanks for taking my questions. I have two. The first one on desktop search. It's always nice to hear that your oldest business is still growing. Just curious. Could you give one or two tangible examples or products that are still driving the desktop search growth? And Sundar, I understand you're always focused on user experience. At a high level, what do you see as the biggest areas for potential further improvement in desktop search? And let me ask you the same question about YouTube. What are sort of the biggest areas of tension that you're focused on improving from a user perspective on YouTube right now?

Sundar Pichai -- Chief Executive Officer

On desktop search, to answer your question on the user experience on desktop search, how do we see improvements? Look, the same. First of all, users are having a cross-device experiences, cross-screen experiences, right? I think your desktop search experience, mobile search, everything goes hand-in-hand. All the work we're doing to make mobile search better translates to desktop search as well. Areas where desktop search historically has been a bit behind, is in terms of things like identity, and payments, and having all of that work well to enhance the user experience. And with Chrome now, we are investing a lot in those areas this fall and I think that will contribute overall the improvements there.

On YouTube, there are many areas we are focused on YouTube. They're always very focused on making sure they are supporting emerging formats, be it mobile live streaming or emerging formats like VR. And so that's an area of focus for us. We're also really looking at what are all new modernization options for creators beyond advertising. So be it subscriptions, features like SuperChat which we have launched our very popular. We have beta testing, sponsorships, merchandise, merchandising, and concert ticketing, etc. These are all areas by which we are improving.

Obviously, there are additional areas like music and YouTube TV, which are seeing great momentum as well.

Brian Nowak -- Morgan Stanley & Co. LLC -- Analyst

Great, thanks.

Operator

Thank you. Our next question comes from Ross Sandler of Barclays. Your line is now open.

Ross Sandler -- Barclays Capital Inc. -- Analyst

Just two questions, please. America's revenue accelerated nicely on a currency neutral basis. That's a geography that rarely comes up on the call. So any color about what's driving that acceleration and the sustainability of what's going on in the Americas region? And then, Ruth, a question on Sites' TAC. So I know you said the pace of deleverage is going to start to improve next quarter. Is this something that we should expect to happen for a year and then kind of normalize back to a pretty steady pace of deleverage or are we over some critical threshold and we should see this trend of moderating deleverage continue for several years into the future? Thanks.

Ruth Porat -- Chief Financial Officer

So on your first question, other Americas, I would say like the other regions, really pleased with the strength we have across the region. This is obviously one of the smaller ones. So, growing at a slightly faster clip and really pleased with the broad strength there. It starts with the Sites' revenue strength, but on top of that they benefited from hardware devices launching in some additional markets over the past year.

In terms of TAC, I would say there's not much to add to what we've already said after a kind of sustained period of stronger increases. We were pleased last quarter to be able to signal that this quarter that pace of change is slowing and you know I'll just leave it at that for now.

Operator

Thank you. Our next question comes from Anthony DiClemente of Evercore. Your line is now open.

Anthony DiClemente -- Evercore -- Analyst

Thank you for taking my questions. I have two -- one for Ruth and one for Sundar. Ruth, on CapEx, even if we exclude the Chelsea Market one timer, the growth in CapEx is really substantial, even on a kind of recurring basis. Should we expect that sort of dramatic growth or step-up in the growth of rate in ongoing CapEx to continue throughout the year? Or other than the Chelsea Market one timer, was there any reason to think that it was timing in terms of the timing front-end weighted into the first quarter for CapEx? And then secondly, on Sundar, just a question on YouTube and your media strategy at a higher level. In view of the success of other competitive subscription TV products out there, internet video products, could you just talk about YouTube Red and any thoughts on ways you can accelerate growth for your YouTube subscription video products? Whether that be organic investment in content, original production or even via acquisition? Thank you.

Ruth Porat -- Chief Financial Officer

So in terms of CapEx, it's about equally split between facilities and our technical infrastructure. As you know, we had the $2.4 billion purchase in New York, as well as this continued ground-up development projects. Facilities does tend to be lumpier over time. We are continuing with the ground-up development projects. As a reminder, we do favor owning rather than leasing real estate when we see good opportunities and that has served us well over the years. But I think more to your question with respect to technical infrastructure, that reflects investments in compute power to support growth that we see across Google.

The largest component is on machines. It's also on data centers and undersea cables. On machines, the biggest contributor is the demand that we're seeing. In particular, it's the expanding application of the machine learning efforts across Alphabet, plus the requirements for Cloud and Search and YouTube. Then secondarily, the increased cost of newer technologies. CPUs, memory, network. So, I think really to answer your question most directly, it reflects the demand that we're seeing. I wouldn't want to suggest a one-off in terms of the investments we're making in technical infrastructure.

Then in terms of the data centers, we are investing globally. We currently have over 20 sites on four continents. That's under differing stages of construction, as Sundar noted. It's across the U.S., Tennessee, Alabama, South Carolina. Iowa. So, we're really building out to support the growth that we're seeing.

Anthony DiClemente -- Evercore -- Analyst

Thank you.

Operator

Thank you. And our --

Sundar Pichai -- Chief Executive Officer

Sorry. On the second question on YouTube, for sure, the adoption and feedback across both YouTube Red and YouTube Music has been great to see. We are doing a lot more work there. You will see us continuing this further developing those offerings better and, s part of that, further drive adoption. So for example YouTube originals end up playing a big part in YouTube Red subscriptions. So far, we've launched in a handful of markets and will continue to roll it out to more markets there. On YouTube Music, we are working on enhancing the product and I think there's definitely great opportunities there as well.

Anthony DiClemente -- Evercore -- Analyst

Okay. Thank you very much.

Operator

Thank you and our next question comes from Dan Salmon of BMO Capital Markets. Your line is now open.

Daniel Salmon -- BMO Capital Markets -- Analyst

Hi, good afternoon, everyone. Sundar, I had two for you. First, during the quarter, there were some reports of changes in leadership at your Search and AI divisions. Functionally, it sounds like separating leadership over those two very large, important businesses for the company. Could you talk a little bit more about that and how that may impact broader strategy for the company? And then, second, a little bit more tactical one on your advertising business you launched -- shopping actions during the quarter with a pay-per-sale pricing model. And I was just curious to hear what type of feedback you were getting from advertisers that led to a product with that pricing model in particular or any other features of shopping actions that you think are important to highlight.

Sundar Pichai -- Chief Executive Officer

Thanks, Dan. Obviously, search has been leading the company in terms of how they have been adopting machine learning and AI and it's really working well through Search and Assistant. We sense, obviously as an AI-first company, AI cuts across everything we do in Google. As an organization, it's a horizontal organization which needs to serve all our areas in some ways the change reflects that. We have very capable leaders. Jeff, who was the founder of Google Brain really well-positioned to lead our AI efforts. Ben has been at Google since the early days of Search. He started at Google in 2000, and has been driving Search for 18 years. And so we are very excited and we think the changes will serve the company well.

On your second thing, the question was on shopping actions?

Daniel Salmon -- BMO Capital Markets -- Analyst

And in particular, the price-per-sale-pricing model.

Sundar Pichai -- Chief Executive Officer

I think we you know we announced this new service in March. The feedback has been very positive. I mentioned earlier, which is for retailers when they are testing this, they see drives in increase in basket size, so that means users are interacting with the product well and you know that's all I have to share for now. It's still early days OK.

Daniel Salmon -- BMO Capital Markets -- Analyst

Okay, great. Thank you.

Operator

Thank you. And our next question comes from Colin Sebastian of Robert Baird. Your line is now open.

Colin Alan Sebastian -- Robert W. Baird & Co., Inc. -- Analyst

A couple for me, please. First, on the cloud business. I was wondering if you could provide any color, at least on the relative momentum you are seeing in that segment from infrastructure services compared to platform or software services. And then related to the adoption of AMP, I guess, a key question we get asked is whether that ultimately changes usage and maybe you have some perspective on this from Android but in the ecosystem between mobile web pages and app usage if you're seeing any shift among users between those formats? Thank you.

Sundar Pichai -- Chief Executive Officer

On the first question of Cloud, look, I think the main thing I would say is the fundamental drivers of adoption of Google Cloud, based on what we hear back from customers, is our advantage in data analytics and machine learning. The fact that we really supported open agile [inaudible] [01:14:07]. Kubernetes has literally become the standard for workloads and the fact that we are open in terms of how we approach the space. Security is becoming a big differentiator for us and something we have been leading for a while, and I think that's driving it. G Suite, I called out earlier, is a good synergistic driver. G Suite is doing well and clearly a very unique offering and it's gotten very comprehensive. And so I think overall, it comes together well.

On your second question around on AMP, AMP has been definitely very successful. It's really made publisher content much more friendly for users in terms of latency and the user experience. And hence, that option has been great. For sure, AMP has definitely helped the mobile web and that's part of the big reasons we did it. Mobile web is still a big part of how users consume content, especially around news. Us investing there clearly makes a difference. I guess, for example, when we look at J.Crew adopted AMP. Their mobile page loading times are now 90 percent faster, and now they are integrating the Google payment request API. That releases checkout times from 2 minutes to 30 seconds or so. So things like that, we're going to constantly stay on improving the mobile web, and that plays a big part in how our ecosystem works.

Colin Alan Sebastian -- Robert W. Baird & Co., Inc. -- Analyst

Thank you.

Operator

Thank you. And our next question comes from Michael Nathanson of MoffettNathanson. Your line is now open.

Michael B. Nathanson -- MoffettNathanson LLC -- Analyst

Thank you. I have two; one for Sundar, one for Ruth. Firstly, Sunday. Can you give us any sense of how Google Home consumers are using search on these devices differently than the traditional ways of search? Are you finding in those homes, is it additive to the overall search activity? And then for Ruth, if you look at the last page the press release, where you've shown the new monetization metrics, you see a real increase in the cost per impression on network sites. So can you talk about maybe what's happening there? Is there a mixed-shift types of publishers, types of products or is that just market inflation? Thanks.

Sundar Pichai -- Chief Executive Officer

For sure. In a Google Home, gives rise to a lot of new and unique use cases. Actions are a big part of it. "Call Mom" is a good example of something you say to Google Home a lot, which is different than what you would say to Search. We see this as a you know good, complementary thing. You will see Search embrace some of the capabilities you find in Google Assistant and Google Home and vice versa. Overall, I view this as additive in the long-term and we are definitely just getting started there.

Ruth Porat -- Chief Financial Officer

And then on the network monetization trends. First, just to give people a bit more color, when we launch the AdSense businesses, our network revenues were largely click-based and over time, there's been a meaningful mix change in our business given the strong growth in Programmatic, which is impression based. So, as a result, this shift now covers more of the business. And then in terms of the question on impression growth versus CPM growth, as we've discussed on prior calls, the network business is actually a number of different businesses.

And then within that we had flat year-on-year growth in the number of impressions that was driven by efforts to improve user experience through a reduction of less relevant ads and AFC. And so, these changes had a positive impact on the year-on-year growth in CPMs. And then the trend in impressions and CPMs can clearly be volatile from quarter to quarter as we're optimizing for the user, publisher, and advertiser, but it really goes to the efforts that we made.

Michael B. Nathanson -- MoffettNathanson LLC -- Analyst

Thanks, Ruth and Sundar.

Operator

Thank you. Our next question comes from Brent Thill of Jefferies. Your line is now open.

Brent Thill -- Jefferies LLC -- Analyst

Good afternoon. Just as a question regarding any changes on your framework for growth versus operating margins, in the last few quarters you've seen steady topline acceleration, yet the margins were down. Can you just talk about how you think about it at a high level for this year? Any changes from the past.

Ruth Porat -- Chief Financial Officer

Yeah it's an important question. As we've talked about on many calls, we have been and remain focused on supporting long term revenue and profit growth. We think the opportunity set ahead of us is quite extraordinary. As I said in opening comments, just given our confidence as we're looking forward, we want to make sure we're investing appropriately in the next phase of innovation and we have clarity about some very compelling opportunities, and in our judgment, that enables us to maximize shareholder value. So, we're taking the steps really to put in place the support for long-term, longer term growth.

Part of what I'm saying you can see in our Sites revenue growth. I tried to make that clear in opening comments that we see this consistent, strong momentum globally and we're really excited about the still sizable opportunity led by mobile search. We're continuing to invest to enhance the user and advertiser experience and thereby extend the growth in our ads business. You can see this also in the trend on CapEx spend as I noted in our opening comments.

The investments we're making there really provide the compute capacity to support our growth outlook and that's supporting the opportunities that come out of machine learning and the Assistant. And then we also see extraordinary upside in the newer markets, as Sundar has talked about, most notably cloud computing and hardware. And so we're investing to support the long-term growth opportunity there.

And then finally, when we look at the market opportunity in both self-driving cars and life sciences, our judgment is it makes sense to place the kinds of investments that we are and with all of this, what also hasn't changed is we appreciate the importance of prioritization and picking our spots. We're keenly focused on steps we can take to both make the right investments with the proper intensity while being diligent about long-term plans in return. So at a high level, the approach hasn't changed. You're seeing the investments here.

Brent Thill -- Jefferies LLC -- Analyst

Thank you.

Operator

Thank you. Our final question comes from one of Stephen Ju of Credit Suisse. Your line is now open.

Stephen Ju -- Credit Suisse Securities, LLC -- Analyst

Thank you. Sundar, I think one of the themes that you as a management team has talked about has been to democratize advertising with AI to help SMBs who may have found advertising across Google's ad products to be perhaps overwhelming. Can you talk about the rate of uptake among the smaller advertisers and whether or not this is helping to catalyze growth in new budgets and where these guys might otherwise have not been able to advertise before? There's SMBs and then there's local also. So what will be the plan to get this technology into the hands of folks who will want to use them? Thank you.

Sundar Pichai -- Chief Executive Officer

Look, this is a big focus for us. Today, SMBs play a big role in our ecosystem. We are doing a lot of stuff to support them across the board. From things like in our offerings to help SMBs get an online presence, create a web site, be discovered in local search and Google Maps. So, we do a lot of detailed work to make sure SMBs are working well. We are also doing a lot of stuff on local, as well, including efforts even around local services. So, we have very specific initiatives. This is going to be, I mean, actually to us it's the bread and butter of what we do here, so there's a lot of effort under way. Not to mention the fact that we provide G Suite for businesses as they scale up as well. So, it's an end to an offering and you'll continue to see us invest more here.

Stephen Ju -- Credit Suisse Securities, LLC -- Analyst

Thank you."""

# Clean the text by removing unwanted characters
clean_text = re.sub(r'[^\w\s]', '', messy_text)  # Remove punctuation
clean_text = re.sub(r'\n', ' ', clean_text)      # Remove newline characters
clean_text = re.sub(r'\s+', ' ', clean_text)     # Remove extra whitespace

# Split the cleaned text into sentences or chunks as needed
sentences = [sentence.strip() for sentence in clean_text.split('.') if sentence.strip()]

# Convert the sentences into a JSON structure
json_data = {"sentences": sentences}

# Write the JSON data to a file or process it further as needed
with open("cleaned_data.json", "w") as json_file:
    json.dump(json_data, json_file, indent=4)

print("JSON file written successfully!")


JSON file written successfully!
